1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
"""Balance paired characters (*, _, etc) in inline tokens."""
from __future__ import annotations
from .state_inline import Delimiter, StateInline
def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
"""For each opening emphasis-like marker find a matching closing one."""
if not delimiters:
return
openersBottom = {}
maximum = len(delimiters)
# headerIdx is the first delimiter of the current (where closer is) delimiter run
headerIdx = 0
lastTokenIdx = -2 # needs any value lower than -1
jumps: list[int] = []
closerIdx = 0
while closerIdx < maximum:
closer = delimiters[closerIdx]
jumps.append(0)
# markers belong to same delimiter run if:
# - they have adjacent tokens
# - AND markers are the same
#
if (
delimiters[headerIdx].marker != closer.marker
or lastTokenIdx != closer.token - 1
):
headerIdx = closerIdx
lastTokenIdx = closer.token
# Length is only used for emphasis-specific "rule of 3",
# if it's not defined (in strikethrough or 3rd party plugins),
# we can default it to 0 to disable those checks.
#
closer.length = closer.length or 0
if not closer.close:
closerIdx += 1
continue
# Previously calculated lower bounds (previous fails)
# for each marker, each delimiter length modulo 3,
# and for whether this closer can be an opener;
# https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
if closer.marker not in openersBottom:
openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
minOpenerIdx = openersBottom[closer.marker][
(3 if closer.open else 0) + (closer.length % 3)
]
openerIdx = headerIdx - jumps[headerIdx] - 1
newMinOpenerIdx = openerIdx
while openerIdx > minOpenerIdx:
opener = delimiters[openerIdx]
if opener.marker != closer.marker:
openerIdx -= jumps[openerIdx] + 1
continue
if opener.open and opener.end < 0:
isOddMatch = False
# from spec:
#
# If one of the delimiters can both open and close emphasis, then the
# sum of the lengths of the delimiter runs containing the opening and
# closing delimiters must not be a multiple of 3 unless both lengths
# are multiples of 3.
#
if (
(opener.close or closer.open)
and ((opener.length + closer.length) % 3 == 0)
and (opener.length % 3 != 0 or closer.length % 3 != 0)
):
isOddMatch = True
if not isOddMatch:
# If previous delimiter cannot be an opener, we can safely skip
# the entire sequence in future checks. This is required to make
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
#
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
lastJump = jumps[openerIdx - 1] + 1
else:
lastJump = 0
jumps[closerIdx] = closerIdx - openerIdx + lastJump
jumps[openerIdx] = lastJump
closer.open = False
opener.end = closerIdx
opener.close = False
newMinOpenerIdx = -1
# treat next token as start of run,
# it optimizes skips in **<...>**a**<...>** pathological case
lastTokenIdx = -2
break
openerIdx -= jumps[openerIdx] + 1
if newMinOpenerIdx != -1:
# If match for this delimiter run failed, we want to set lower bound for
# future lookups. This is required to make sure algorithm has linear
# complexity.
#
# See details here:
# https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
#
openersBottom[closer.marker][
(3 if closer.open else 0) + ((closer.length or 0) % 3)
] = newMinOpenerIdx
closerIdx += 1
def link_pairs(state: StateInline) -> None:
tokens_meta = state.tokens_meta
maximum = len(state.tokens_meta)
processDelimiters(state, state.delimiters)
curr = 0
while curr < maximum:
curr_meta = tokens_meta[curr]
if curr_meta and "delimiters" in curr_meta:
processDelimiters(state, curr_meta["delimiters"])
curr += 1
|