1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
|
"""Support for documenting Python's grammar."""
from __future__ import annotations
import re
from typing import TYPE_CHECKING
from docutils import nodes
from docutils.parsers.rst import directives
from sphinx import addnodes
from sphinx.domains.std import token_xrefs
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import make_id
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Sequence
from typing import Any, Final
from docutils.nodes import Node
from sphinx.application import Sphinx
from sphinx.util.typing import ExtensionMetadata
class snippet_string_node(nodes.inline): # noqa: N801 (snake_case is fine)
"""Node for a string literal in a grammar snippet."""
def __init__(
self,
rawsource: str = '',
text: str = '',
*children: Node,
**attributes: Any,
) -> None:
super().__init__(rawsource, text, *children, **attributes)
# Use the Pygments highlight class for `Literal.String.Other`
self['classes'].append('sx')
class GrammarSnippetBase(SphinxDirective):
"""Common functionality for GrammarSnippetDirective & CompatProductionList."""
# The option/argument handling is left to the individual classes.
grammar_re: Final = re.compile(
r"""
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
(?=:) # ... followed by a colon
|
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
|
(?P<single_quoted>'[^']*') # string in 'quotes'
|
(?P<double_quoted>"[^"]*") # string in "quotes"
""",
re.VERBOSE,
)
def make_grammar_snippet(
self, options: dict[str, Any], content: Sequence[str]
) -> list[addnodes.productionlist]:
"""Create a literal block from options & content."""
group_name = options['group']
node_location = self.get_location()
production_nodes = []
for rawsource, production_defs in self.production_definitions(content):
production = self.make_production(
rawsource,
production_defs,
group_name=group_name,
location=node_location,
)
production_nodes.append(production)
node = addnodes.productionlist(
'',
*production_nodes,
support_smartquotes=False,
classes=['highlight'],
)
self.set_source_info(node)
return [node]
def production_definitions(
self, lines: Iterable[str], /
) -> Iterator[tuple[str, list[tuple[str, str]]]]:
"""Yield pairs of rawsource and production content dicts."""
production_lines: list[str] = []
production_content: list[tuple[str, str]] = []
for line in lines:
# If this line is the start of a new rule (text in the column 1),
# emit the current production and start a new one.
if not line[:1].isspace():
rawsource = '\n'.join(production_lines)
production_lines.clear()
if production_content:
yield rawsource, production_content
production_content = []
# Append the current line for the raw source
production_lines.append(line)
# Parse the line into constituent parts
last_pos = 0
for match in self.grammar_re.finditer(line):
# Handle text between matches
if match.start() > last_pos:
unmatched_text = line[last_pos : match.start()]
production_content.append(('text', unmatched_text))
last_pos = match.end()
# Handle matches.
# After filtering None (non-matches), exactly one groupdict()
# entry should remain.
[(re_group_name, content)] = (
(re_group_name, content)
for re_group_name, content in match.groupdict().items()
if content is not None
)
production_content.append((re_group_name, content))
production_content.append(('text', line[last_pos:] + '\n'))
# Emit the final production
if production_content:
rawsource = '\n'.join(production_lines)
yield rawsource, production_content
def make_production(
self,
rawsource: str,
production_defs: list[tuple[str, str]],
*,
group_name: str,
location: str,
) -> addnodes.production:
"""Create a production node from a list of parts."""
production_node = addnodes.production(rawsource)
for re_group_name, content in production_defs:
match re_group_name:
case 'rule_name':
production_node += self.make_name_target(
name=content,
production_group=group_name,
location=location,
)
case 'rule_ref':
production_node += token_xrefs(content, group_name)
case 'single_quoted' | 'double_quoted':
production_node += snippet_string_node('', content)
case 'text':
production_node += nodes.Text(content)
case _:
raise ValueError(f'unhandled match: {re_group_name!r}')
return production_node
def make_name_target(
self,
*,
name: str,
production_group: str,
location: str,
) -> addnodes.literal_strong:
"""Make a link target for the given production."""
# Cargo-culted magic to make `name_node` a link target
# similar to Sphinx `production`.
# This needs to be the same as what Sphinx does
# to avoid breaking existing links.
name_node = addnodes.literal_strong(name, name)
prefix = f'grammar-token-{production_group}'
node_id = make_id(self.env, self.state.document, prefix, name)
name_node['ids'].append(node_id)
self.state.document.note_implicit_target(name_node, name_node)
obj_name = f'{production_group}:{name}' if production_group else name
std = self.env.domains.standard_domain
std.note_object('token', obj_name, node_id, location=location)
return name_node
class GrammarSnippetDirective(GrammarSnippetBase):
"""Transform a grammar-snippet directive to a Sphinx literal_block
That is, turn something like:
.. grammar-snippet:: file
:group: python-grammar
file: (NEWLINE | statement)*
into something similar to Sphinx productionlist, but better suited
for our needs:
- Instead of `::=`, use a colon, as in `Grammar/python.gram`
- Show the listing almost as is, with no auto-aligment.
The only special character is the backtick, which marks tokens.
Unlike Sphinx's productionlist, this directive supports options.
The "group" must be given as a named option.
The content must be preceded by a blank line (like with most ReST
directives).
"""
has_content = True
option_spec = {
'group': directives.unchanged_required,
}
# We currently ignore arguments.
required_arguments = 0
optional_arguments = 1
final_argument_whitespace = True
def run(self) -> list[addnodes.productionlist]:
return self.make_grammar_snippet(self.options, self.content)
class CompatProductionList(GrammarSnippetBase):
"""Create grammar snippets from reST productionlist syntax
This is intended to be a transitional directive, used while we switch
from productionlist to grammar-snippet.
It makes existing docs that use the ReST syntax look like grammar-snippet,
as much as possible.
"""
has_content = False
required_arguments = 1
optional_arguments = 0
final_argument_whitespace = True
option_spec = {}
def run(self) -> list[addnodes.productionlist]:
# The "content" of a productionlist is actually the first and only
# argument. The first line is the group; the rest is the content lines.
lines = self.arguments[0].splitlines()
group = lines[0].strip()
options = {'group': group}
# We assume there's a colon in each line; align on it.
align_column = max(line.index(':') for line in lines[1:]) + 1
content = []
for line in lines[1:]:
rule_name, _colon, text = line.partition(':')
rule_name = rule_name.strip()
if rule_name:
name_part = rule_name + ':'
else:
name_part = ''
content.append(f'{name_part:<{align_column}}{text}')
return self.make_grammar_snippet(options, content)
def setup(app: Sphinx) -> ExtensionMetadata:
app.add_directive('grammar-snippet', GrammarSnippetDirective)
app.add_directive_to_domain(
'std', 'productionlist', CompatProductionList, override=True
)
return {
'version': '1.0',
'parallel_read_safe': True,
'parallel_write_safe': True,
}
|