1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
import re
from collections.abc import Iterable
from typing import Optional, Dict
from sybil import Document, Region
from sybil.parsers.abstract.lexers import BlockLexer
START_PATTERN_TEMPLATE =(
r'^(?P<prefix>[ \t]*)\.\.\s*(?P<directive>{directive})'
r'{delimiter}[ \t]*'
r'(?P<arguments>[^\n]+)?\n'
r'(?P<options>(?:\1[ \t]+:[\w-]*:[^\n]*\n)+)?'
)
OPTIONS_PATTERN = re.compile(r'[^:]*:(?P<name>[^:]+):[ \t]*(?P<value>[^\n]*)\n')
END_PATTERN_TEMPLATE = r'((?<=\n)(?=\.\.)|\n?\Z|\n[ \t]{{0,{len_prefix}}}(?=\S|\Z))'
def parse_options_and_source(lexed: Region) -> None:
lexemes = lexed.lexemes
raw_options = lexemes.pop('options', None)
options = lexemes['options'] = {}
if raw_options:
for match in OPTIONS_PATTERN.finditer(raw_options):
options[match['name']] = match['value']
source = lexemes.get('source')
if source:
lexemes['source'] = source.strip_leading_newlines()
class DirectiveLexer(BlockLexer):
"""
A :class:`~sybil.parsers.abstract.lexers.BlockLexer` for ReST directives that extracts the
following lexemes:
- ``directive`` as a :class:`str`.
- ``arguments`` as a :class:`str`.
- ``source`` as a :class:`~sybil.Lexeme`.
:param directive:
a :class:`str` containing a regular expression pattern to match directive names.
:param arguments:
a :class:`str` containing a regular expression pattern to match directive arguments.
:param mapping:
If provided, this is used to rename lexemes from the keys in the mapping to their values.
Only mapped lexemes will be returned in any :class:`~sybil.Region` objects.
"""
delimiter = '::'
def __init__(
self,
directive: str,
arguments: str = '',
mapping: Optional[Dict[str, str]] = None,
) -> None:
"""
A lexer for ReST directives.
Both ``directive`` and ``arguments`` are regex patterns.
"""
super().__init__(
start_pattern=re.compile(
START_PATTERN_TEMPLATE.format(
directive=directive,
delimiter=self.delimiter,
arguments=arguments
),
re.MULTILINE
),
end_pattern_template=END_PATTERN_TEMPLATE,
mapping=mapping,
)
def __call__(self, document: Document) -> Iterable[Region]:
for lexed in super().__call__(document):
parse_options_and_source(lexed)
yield lexed
class DirectiveInCommentLexer(DirectiveLexer):
"""
A :class:`~sybil.parsers.abstract.lexers.BlockLexer` for faux ReST directives in comments
such as:
.. code-block:: rest
.. not-really-a-directive: some-argument
Source here...
It extracts the following lexemes:
- ``directive`` as a :class:`str`.
- ``arguments`` as a :class:`str`.
- ``source`` as a :class:`~sybil.Lexeme`.
:param directive:
a :class:`str` containing a regular expression pattern to match directive names.
:param arguments:
a :class:`str` containing a regular expression pattern to match directive arguments.
:param mapping:
If provided, this is used to rename lexemes from the keys in the mapping to their values.
Only mapped lexemes will be returned in any :class:`~sybil.Region` objects.
"""
# This is the pattern used for invisible code blocks and the like.
delimiter = ':?'
|