1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
|
from __future__ import annotations
from functools import partial
from typing import Any, Sequence
from markdown_it import MarkdownIt
from markdown_it.rules_block import StateBlock
from markdown_it.rules_core import StateCore
from markdown_it.rules_inline import StateInline
from markdown_it.token import Token
from mdit_py_plugins.utils import is_code_block
from .parse import ParseError, parse
def attrs_plugin(
md: MarkdownIt,
*,
after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
spans: bool = False,
span_after: str = "link",
allowed: Sequence[str] | None = None,
) -> None:
"""Parse inline attributes that immediately follow certain inline elements::
{#id .a b=c}
This syntax is inspired by
`Djot spans
<https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.
Inside the curly braces, the following syntax is possible:
- `.foo` specifies foo as a class.
Multiple classes may be given in this way; they will be combined.
- `#foo` specifies foo as an identifier.
An element may have only one identifier;
if multiple identifiers are given, the last one is used.
- `key="value"` or `key=value` specifies a key-value attribute.
Quotes are not needed when the value consists entirely of
ASCII alphanumeric characters or `_` or `:` or `-`.
Backslash escapes may be used inside quoted values.
- `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
Multiple attribute blocks are merged.
:param md: The MarkdownIt instance to modify.
:param after: The names of inline elements after which attributes may be specified.
This plugin does not support attributes after emphasis, strikethrough or text elements,
which all require post-parse processing.
:param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
Note Markdown link references take precedence over this syntax.
:param span_after: The name of an inline rule after which spans may be specified.
:param allowed: A list of allowed attribute names.
If not ``None``, any attributes not in this list will be removed
and placed in the token's meta under the key "insecure_attrs".
"""
if spans:
md.inline.ruler.after(span_after, "span", _span_rule)
if after:
md.inline.ruler.push(
"attr",
partial(
_attr_inline_rule,
after=after,
allowed=None if allowed is None else set(allowed),
),
)
def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
"""Parse block attributes.
Block attributes are attributes on a single line, with no other content.
They attach the specified attributes to the block below them::
{.a #b c=1}
A paragraph, that will be assigned the class ``a`` and the identifier ``b``.
Attributes can be stacked, with classes accumulating and lower attributes overriding higher::
{#a .a c=1}
{#b .b c=2}
A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
This syntax is inspired by Djot block attributes.
:param allowed: A list of allowed attribute names.
If not ``None``, any attributes not in this list will be removed
and placed in the token's meta under the key "insecure_attrs".
"""
md.block.ruler.before("fence", "attr", _attr_block_rule)
md.core.ruler.after(
"block",
"attr",
partial(
_attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
),
)
def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
"""Find the opening token index, if the token is closing."""
if tokens[index].nesting != -1:
return index
level = 0
while index >= 0:
level += tokens[index].nesting
if level == 0:
return index
index -= 1
return None
def _span_rule(state: StateInline, silent: bool) -> bool:
if state.src[state.pos] != "[":
return False
maximum = state.posMax
labelStart = state.pos + 1
labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)
# parser failed to find ']', so it's not a valid span
if labelEnd < 0:
return False
pos = labelEnd + 1
# check not at end of inline
if pos >= maximum:
return False
try:
new_pos, attrs = parse(state.src[pos:])
except ParseError:
return False
pos += new_pos + 1
if not silent:
state.pos = labelStart
state.posMax = labelEnd
token = state.push("span_open", "span", 1)
token.attrs = attrs # type: ignore[assignment]
state.md.inline.tokenize(state)
token = state.push("span_close", "span", -1)
state.pos = pos
state.posMax = maximum
return True
def _attr_inline_rule(
state: StateInline,
silent: bool,
after: Sequence[str],
*,
allowed: set[str] | None = None,
) -> bool:
if state.pending or not state.tokens:
return False
token = state.tokens[-1]
if token.type not in after:
return False
try:
new_pos, attrs = parse(state.src[state.pos :])
except ParseError:
return False
token_index = _find_opening(state.tokens, len(state.tokens) - 1)
if token_index is None:
return False
state.pos += new_pos + 1
if not silent:
attr_token = state.tokens[token_index]
if "class" in attrs and "class" in token.attrs:
attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
_add_attrs(attr_token, attrs, allowed)
return True
def _attr_block_rule(
state: StateBlock, startLine: int, endLine: int, silent: bool
) -> bool:
"""Find a block of attributes.
The block must be a single line that begins with a `{`, after three or less spaces,
and end with a `}` followed by any number if spaces.
"""
if is_code_block(state, startLine):
return False
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
# if it doesn't start with a {, it's not an attribute block
if state.src[pos] != "{":
return False
# find first non-space character from the right
while maximum > pos and state.src[maximum - 1] in (" ", "\t"):
maximum -= 1
# if it doesn't end with a }, it's not an attribute block
if maximum <= pos:
return False
if state.src[maximum - 1] != "}":
return False
try:
new_pos, attrs = parse(state.src[pos:maximum])
except ParseError:
return False
# if the block was resolved earlier than expected, it's not an attribute block
# TODO this was not working in some instances, so I disabled it
# if (maximum - 1) != new_pos:
# return False
if silent:
return True
token = state.push("attrs_block", "", 0)
token.attrs = attrs # type: ignore[assignment]
token.map = [startLine, startLine + 1]
state.line = startLine + 1
return True
def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
"""Find attribute block then move its attributes to the next block."""
i = 0
len_tokens = len(state.tokens)
while i < len_tokens:
if state.tokens[i].type != "attrs_block":
i += 1
continue
if i + 1 < len_tokens:
next_token = state.tokens[i + 1]
# classes are appended
if "class" in state.tokens[i].attrs and "class" in next_token.attrs:
state.tokens[i].attrs["class"] = (
f"{state.tokens[i].attrs['class']} {next_token.attrs['class']}"
)
if next_token.type == "attrs_block":
# subsequent attribute blocks take precedence, when merging
for key, value in state.tokens[i].attrs.items():
if key == "class" or key not in next_token.attrs:
next_token.attrs[key] = value
else:
_add_attrs(next_token, state.tokens[i].attrs, allowed)
state.tokens.pop(i)
len_tokens -= 1
def _add_attrs(
token: Token,
attrs: dict[str, Any],
allowed: set[str] | None,
) -> None:
"""Add attributes to a token, skipping any disallowed attributes."""
if allowed is not None and (
disallowed := {k: v for k, v in attrs.items() if k not in allowed}
):
token.meta["insecure_attrs"] = disallowed
attrs = {k: v for k, v in attrs.items() if k in allowed}
# attributes takes precedence over existing attributes
token.attrs.update(attrs)
|