1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
from __future__ import annotations
from collections import namedtuple
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal
from .._compat import DATACLASS_KWARGS
from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
from ..ruler import StateBase
from ..token import Token
from ..utils import EnvType
if TYPE_CHECKING:
from markdown_it import MarkdownIt
@dataclass(**DATACLASS_KWARGS)
class Delimiter:
# Char code of the starting marker (number).
marker: int
# Total length of these series of delimiters.
length: int
# A position of the token this delimiter corresponds to.
token: int
# If this delimiter is matched as a valid opener, `end` will be
# equal to its position, otherwise it's `-1`.
end: int
# Boolean flags that determine if this delimiter could open or close
# an emphasis.
open: bool
close: bool
level: bool | None = None
Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
class StateInline(StateBase):
def __init__(
self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
) -> None:
self.src = src
self.env = env
self.md = md
self.tokens = outTokens
self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
self.pos = 0
self.posMax = len(self.src)
self.level = 0
self.pending = ""
self.pendingLevel = 0
# Stores { start: end } pairs. Useful for backtrack
# optimization of pairs parse (emphasis, strikes).
self.cache: dict[int, int] = {}
# List of emphasis-like delimiters for current tag
self.delimiters: list[Delimiter] = []
# Stack of delimiter lists for upper level tags
self._prev_delimiters: list[list[Delimiter]] = []
# backticklength => last seen position
self.backticks: dict[int, int] = {}
self.backticksScanned = False
# Counter used to disable inline linkify-it execution
# inside <a> and markdown links
self.linkLevel = 0
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}"
f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
)
def pushPending(self) -> Token:
token = Token("text", "", 0)
token.content = self.pending
token.level = self.pendingLevel
self.tokens.append(token)
self.pending = ""
return token
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
"""Push new token to "stream".
If pending text exists - flush it as text token
"""
if self.pending:
self.pushPending()
token = Token(ttype, tag, nesting)
token_meta = None
if nesting < 0:
# closing tag
self.level -= 1
self.delimiters = self._prev_delimiters.pop()
token.level = self.level
if nesting > 0:
# opening tag
self.level += 1
self._prev_delimiters.append(self.delimiters)
self.delimiters = []
token_meta = {"delimiters": self.delimiters}
self.pendingLevel = self.level
self.tokens.append(token)
self.tokens_meta.append(token_meta)
return token
def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
"""
Scan a sequence of emphasis-like markers, and determine whether
it can start an emphasis sequence or end an emphasis sequence.
- start - position to scan from (it should point at a valid marker);
- canSplitWord - determine if these markers can be found inside a word
"""
pos = start
maximum = self.posMax
marker = self.src[start]
# treat beginning of the line as a whitespace
lastChar = self.src[start - 1] if start > 0 else " "
while pos < maximum and self.src[pos] == marker:
pos += 1
count = pos - start
# treat end of the line as a whitespace
nextChar = self.src[pos] if pos < maximum else " "
isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
isLastWhiteSpace = isWhiteSpace(ord(lastChar))
isNextWhiteSpace = isWhiteSpace(ord(nextChar))
left_flanking = not (
isNextWhiteSpace
or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
)
right_flanking = not (
isLastWhiteSpace
or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
)
if not canSplitWord:
can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
else:
can_open = left_flanking
can_close = right_flanking
return Scanned(can_open, can_close, count)
|