File: index.py

package info (click to toggle)
mdit-py-plugins 0.4.2-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 672 kB
  • sloc: python: 3,595; sh: 8; makefile: 7
file content (274 lines) | stat: -rw-r--r-- 8,929 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
from __future__ import annotations

from functools import partial
from typing import Any, Sequence

from markdown_it import MarkdownIt
from markdown_it.rules_block import StateBlock
from markdown_it.rules_core import StateCore
from markdown_it.rules_inline import StateInline
from markdown_it.token import Token

from mdit_py_plugins.utils import is_code_block

from .parse import ParseError, parse


def attrs_plugin(
    md: MarkdownIt,
    *,
    after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
    spans: bool = False,
    span_after: str = "link",
    allowed: Sequence[str] | None = None,
) -> None:
    """Parse inline attributes that immediately follow certain inline elements::

        ![alt](https://image.com){#id .a b=c}

    This syntax is inspired by
    `Djot spans
    <https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.

    Inside the curly braces, the following syntax is possible:

    - `.foo` specifies foo as a class.
      Multiple classes may be given in this way; they will be combined.
    - `#foo` specifies foo as an identifier.
      An element may have only one identifier;
      if multiple identifiers are given, the last one is used.
    - `key="value"` or `key=value` specifies a key-value attribute.
       Quotes are not needed when the value consists entirely of
       ASCII alphanumeric characters or `_` or `:` or `-`.
       Backslash escapes may be used inside quoted values.
    - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).

    Multiple attribute blocks are merged.

    :param md: The MarkdownIt instance to modify.
    :param after: The names of inline elements after which attributes may be specified.
        This plugin does not support attributes after emphasis, strikethrough or text elements,
        which all require post-parse processing.
    :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
        Note Markdown link references take precedence over this syntax.
    :param span_after: The name of an inline rule after which spans may be specified.
    :param allowed: A list of allowed attribute names.
        If not ``None``, any attributes not in this list will be removed
        and placed in the token's meta under the key "insecure_attrs".
    """

    if spans:
        md.inline.ruler.after(span_after, "span", _span_rule)
    if after:
        md.inline.ruler.push(
            "attr",
            partial(
                _attr_inline_rule,
                after=after,
                allowed=None if allowed is None else set(allowed),
            ),
        )


def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
    """Parse block attributes.

    Block attributes are attributes on a single line, with no other content.
    They attach the specified attributes to the block below them::

        {.a #b c=1}
        A paragraph, that will be assigned the class ``a`` and the identifier ``b``.

    Attributes can be stacked, with classes accumulating and lower attributes overriding higher::

        {#a .a c=1}
        {#b .b c=2}
        A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.

    This syntax is inspired by Djot block attributes.

    :param allowed: A list of allowed attribute names.
        If not ``None``, any attributes not in this list will be removed
        and placed in the token's meta under the key "insecure_attrs".
    """
    md.block.ruler.before("fence", "attr", _attr_block_rule)
    md.core.ruler.after(
        "block",
        "attr",
        partial(
            _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
        ),
    )


def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
    """Find the opening token index, if the token is closing."""
    if tokens[index].nesting != -1:
        return index
    level = 0
    while index >= 0:
        level += tokens[index].nesting
        if level == 0:
            return index
        index -= 1
    return None


def _span_rule(state: StateInline, silent: bool) -> bool:
    if state.src[state.pos] != "[":
        return False

    maximum = state.posMax
    labelStart = state.pos + 1
    labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)

    # parser failed to find ']', so it's not a valid span
    if labelEnd < 0:
        return False

    pos = labelEnd + 1

    # check not at end of inline
    if pos >= maximum:
        return False

    try:
        new_pos, attrs = parse(state.src[pos:])
    except ParseError:
        return False

    pos += new_pos + 1

    if not silent:
        state.pos = labelStart
        state.posMax = labelEnd
        token = state.push("span_open", "span", 1)
        token.attrs = attrs  # type: ignore[assignment]
        state.md.inline.tokenize(state)
        token = state.push("span_close", "span", -1)

    state.pos = pos
    state.posMax = maximum
    return True


def _attr_inline_rule(
    state: StateInline,
    silent: bool,
    after: Sequence[str],
    *,
    allowed: set[str] | None = None,
) -> bool:
    if state.pending or not state.tokens:
        return False
    token = state.tokens[-1]
    if token.type not in after:
        return False
    try:
        new_pos, attrs = parse(state.src[state.pos :])
    except ParseError:
        return False
    token_index = _find_opening(state.tokens, len(state.tokens) - 1)
    if token_index is None:
        return False
    state.pos += new_pos + 1
    if not silent:
        attr_token = state.tokens[token_index]
        if "class" in attrs and "class" in token.attrs:
            attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
        _add_attrs(attr_token, attrs, allowed)
    return True


def _attr_block_rule(
    state: StateBlock, startLine: int, endLine: int, silent: bool
) -> bool:
    """Find a block of attributes.

    The block must be a single line that begins with a `{`, after three or less spaces,
    and end with a `}` followed by any number if spaces.
    """
    if is_code_block(state, startLine):
        return False

    pos = state.bMarks[startLine] + state.tShift[startLine]
    maximum = state.eMarks[startLine]

    # if it doesn't start with a {, it's not an attribute block
    if state.src[pos] != "{":
        return False

    # find first non-space character from the right
    while maximum > pos and state.src[maximum - 1] in (" ", "\t"):
        maximum -= 1
    # if it doesn't end with a }, it's not an attribute block
    if maximum <= pos:
        return False
    if state.src[maximum - 1] != "}":
        return False

    try:
        new_pos, attrs = parse(state.src[pos:maximum])
    except ParseError:
        return False

    # if the block was resolved earlier than expected, it's not an attribute block
    # TODO this was not working in some instances, so I disabled it
    # if (maximum - 1) != new_pos:
    #     return False

    if silent:
        return True

    token = state.push("attrs_block", "", 0)
    token.attrs = attrs  # type: ignore[assignment]
    token.map = [startLine, startLine + 1]

    state.line = startLine + 1
    return True


def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
    """Find attribute block then move its attributes to the next block."""
    i = 0
    len_tokens = len(state.tokens)
    while i < len_tokens:
        if state.tokens[i].type != "attrs_block":
            i += 1
            continue

        if i + 1 < len_tokens:
            next_token = state.tokens[i + 1]

            # classes are appended
            if "class" in state.tokens[i].attrs and "class" in next_token.attrs:
                state.tokens[i].attrs["class"] = (
                    f"{state.tokens[i].attrs['class']} {next_token.attrs['class']}"
                )

            if next_token.type == "attrs_block":
                # subsequent attribute blocks take precedence, when merging
                for key, value in state.tokens[i].attrs.items():
                    if key == "class" or key not in next_token.attrs:
                        next_token.attrs[key] = value
            else:
                _add_attrs(next_token, state.tokens[i].attrs, allowed)

        state.tokens.pop(i)
        len_tokens -= 1


def _add_attrs(
    token: Token,
    attrs: dict[str, Any],
    allowed: set[str] | None,
) -> None:
    """Add attributes to a token, skipping any disallowed attributes."""
    if allowed is not None and (
        disallowed := {k: v for k, v in attrs.items() if k not in allowed}
    ):
        token.meta["insecure_attrs"] = disallowed
        attrs = {k: v for k, v in attrs.items() if k in allowed}

    # attributes takes precedence over existing attributes
    token.attrs.update(attrs)