File: balance_pairs.py

package info (click to toggle)
markdown-it-py 3.0.0-3
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 1,780 kB
sloc: python: 5,214; xml: 39; sh: 29; makefile: 24
file content (137 lines) | stat: -rw-r--r-- 4,851 bytes
parent folder | download | duplicates (8)
"""Balance paired characters (*, _, etc) in inline tokens."""
from __future__ import annotations

from .state_inline import Delimiter, StateInline


def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
    """For each opening emphasis-like marker find a matching closing one."""
    if not delimiters:
        return

    openersBottom = {}
    maximum = len(delimiters)

    # headerIdx is the first delimiter of the current (where closer is) delimiter run
    headerIdx = 0
    lastTokenIdx = -2  # needs any value lower than -1
    jumps: list[int] = []
    closerIdx = 0
    while closerIdx < maximum:
        closer = delimiters[closerIdx]

        jumps.append(0)

        # markers belong to same delimiter run if:
        #  - they have adjacent tokens
        #  - AND markers are the same
        #
        if (
            delimiters[headerIdx].marker != closer.marker
            or lastTokenIdx != closer.token - 1
        ):
            headerIdx = closerIdx
        lastTokenIdx = closer.token

        # Length is only used for emphasis-specific "rule of 3",
        # if it's not defined (in strikethrough or 3rd party plugins),
        # we can default it to 0 to disable those checks.
        #
        closer.length = closer.length or 0

        if not closer.close:
            closerIdx += 1
            continue

        # Previously calculated lower bounds (previous fails)
        # for each marker, each delimiter length modulo 3,
        # and for whether this closer can be an opener;
        # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
        if closer.marker not in openersBottom:
            openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]

        minOpenerIdx = openersBottom[closer.marker][
            (3 if closer.open else 0) + (closer.length % 3)
        ]

        openerIdx = headerIdx - jumps[headerIdx] - 1

        newMinOpenerIdx = openerIdx

        while openerIdx > minOpenerIdx:
            opener = delimiters[openerIdx]

            if opener.marker != closer.marker:
                openerIdx -= jumps[openerIdx] + 1
                continue

            if opener.open and opener.end < 0:
                isOddMatch = False

                # from spec:
                #
                # If one of the delimiters can both open and close emphasis, then the
                # sum of the lengths of the delimiter runs containing the opening and
                # closing delimiters must not be a multiple of 3 unless both lengths
                # are multiples of 3.
                #
                if (
                    (opener.close or closer.open)
                    and ((opener.length + closer.length) % 3 == 0)
                    and (opener.length % 3 != 0 or closer.length % 3 != 0)
                ):
                    isOddMatch = True

                if not isOddMatch:
                    # If previous delimiter cannot be an opener, we can safely skip
                    # the entire sequence in future checks. This is required to make
                    # sure algorithm has linear complexity (see *_*_*_*_*_... case).
                    #
                    if openerIdx > 0 and not delimiters[openerIdx - 1].open:
                        lastJump = jumps[openerIdx - 1] + 1
                    else:
                        lastJump = 0

                    jumps[closerIdx] = closerIdx - openerIdx + lastJump
                    jumps[openerIdx] = lastJump

                    closer.open = False
                    opener.end = closerIdx
                    opener.close = False
                    newMinOpenerIdx = -1

                    # treat next token as start of run,
                    # it optimizes skips in **<...>**a**<...>** pathological case
                    lastTokenIdx = -2

                    break

            openerIdx -= jumps[openerIdx] + 1

        if newMinOpenerIdx != -1:
            # If match for this delimiter run failed, we want to set lower bound for
            # future lookups. This is required to make sure algorithm has linear
            # complexity.
            #
            # See details here:
            # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
            #
            openersBottom[closer.marker][
                (3 if closer.open else 0) + ((closer.length or 0) % 3)
            ] = newMinOpenerIdx

        closerIdx += 1


def link_pairs(state: StateInline) -> None:
    tokens_meta = state.tokens_meta
    maximum = len(state.tokens_meta)

    processDelimiters(state, state.delimiters)

    curr = 0
    while curr < maximum:
        curr_meta = tokens_meta[curr]
        if curr_meta and "delimiters" in curr_meta:
            processDelimiters(state, curr_meta["delimiters"])
        curr += 1