File: misc.py

package info (click to toggle)
python-docutils 0.22%2Bdfsg-1
links: PTS, VCS
area: main
in suites: experimental
size: 11,448 kB
sloc: python: 53,302; lisp: 14,475; xml: 1,807; javascript: 1,032; makefile: 102; sh: 96
file content (694 lines) | stat: -rw-r--r-- 28,414 bytes
# $Id: misc.py 10126 2025-05-13 08:37:56Z milde $
# Authors: David Goodger <goodger@python.org>; Dethe Elza
# Copyright: This module has been placed in the public domain.

"""Miscellaneous directives."""

from __future__ import annotations

__docformat__ = 'reStructuredText'

import os
import re
import time
from pathlib import Path
from urllib.request import urlopen
from urllib.error import URLError

import docutils
from docutils import frontend, io, nodes, statemachine, utils
from docutils.parsers.rst import Directive, convert_directive_function
from docutils.parsers.rst import directives, roles, states
from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
from docutils.transforms import misc

TYPE_CHECKING = False
if TYPE_CHECKING:
    from docutils.nodes import StrPath


def adapt_path(path: str, source='', root_prefix='') -> str:
    # Adapt path to files to include or embed.
    # `root_prefix` is prepended to absolute paths (cf. root_prefix setting),
    # `source` is the `current_source` of the including directive (which may
    # be a file included by the main document).
    if root_prefix and path.startswith('/'):
        base = Path(root_prefix)
        path = path[1:]
    else:
        base = Path(source).parent
    # pepend "base" and convert to relative path for shorter system messages
    return utils.relative_path(None, base/path)


class Include(Directive):

    """
    Include content read from a separate source file.

    Content may be parsed by the parser, or included as a literal
    block.  The encoding of the included file can be specified.  Only
    a part of the given file argument may be included by specifying
    start and end line or text to match before and/or after the text
    to be used.

    https://docutils.sourceforge.io/docs/ref/rst/directives.html#include
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'literal': directives.flag,
                   'code': directives.unchanged,
                   'encoding': directives.encoding,
                   'parser': directives.parser_name,
                   'tab-width': int,
                   'start-line': int,
                   'end-line': int,
                   'start-after': directives.unchanged_required,
                   'end-before': directives.unchanged_required,
                   # ignored except for 'literal' or 'code':
                   'number-lines': directives.value_or((None,), int),
                   'class': directives.class_option,
                   'name': directives.unchanged}

    standard_include_path = Path(docutils._datadir(states.__file__)) / 'include'

    def run(self) -> list:
        """Include a file as part of the content of this reST file.

        Depending on the options, the file content (or a clipping) is
        converted to nodes and returned or inserted into the input stream.
        """
        self.settings = settings = self.state.document.settings
        if not settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        self.tab_width = self.options.get('tab-width', settings.tab_width)
        self.clip_options = (self.options.get('start-line', None),
                             self.options.get('end-line', None),
                             self.options.get('start-after', ''),
                             self.options.get('end-before', ''))
        path = directives.path(self.arguments[0])
        if path.startswith('<') and path.endswith('>'):
            path = '/' + path[1:-1]
            root_prefix = self.standard_include_path
        else:
            root_prefix = settings.root_prefix
        path = adapt_path(path,
                          self.state.document.current_source,
                          root_prefix)
        self.options['source'] = path

        inputstring = self.read_file(path)

        if 'literal' in self.options:
            return self.as_literal_block(inputstring)
        if 'code' in self.options:
            return self.as_code_block(inputstring)
        if 'parser' in self.options:
            return self.custom_parse(inputstring)
        self.insert_into_input_lines(inputstring)
        return []

    def read_file(self, path: StrPath) -> str:
        """Read text file at `path`. Clip and return content.

        Provisional.
        """
        encoding = self.options.get('encoding', self.settings.input_encoding)
        error_handler = self.settings.input_encoding_error_handler
        try:
            include_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=error_handler)
        except UnicodeEncodeError:
            raise self.severe(f'Problems with "{self.name}" directive path:\n'
                              f'Cannot encode input file path "{path}" '
                              '(wrong locale?).')
        except OSError as error:
            raise self.severe(f'Problems with "{self.name}" directive path:\n'
                              f'{io.error_string(error)}.')
        else:
            self.settings.record_dependencies.add(path)
        try:
            text = include_file.read()
        except UnicodeError as error:
            raise self.severe(f'Problem with "{self.name}" directive:\n'
                              + io.error_string(error))
        # Clip to-be-included content
        startline, endline, starttext, endtext = self.clip_options
        if startline or (endline is not None):
            lines = text.splitlines()
            text = '\n'.join(lines[startline:endline])
        # start-after/end-before: no restrictions on newlines in match-text,
        # and no restrictions on matching inside lines vs. line boundaries
        if starttext:
            # skip content in text before *and incl.* a matching text
            after_index = text.find(starttext)
            if after_index < 0:
                raise self.severe('Problem with "start-after" option of '
                                  f'"{self.name}" directive:\nText not found.')
            text = text[after_index + len(starttext):]
        if endtext:
            # skip content in text after *and incl.* a matching text
            before_index = text.find(endtext)
            if before_index < 0:
                raise self.severe('Problem with "end-before" option of '
                                  f'"{self.name}" directive:\nText not found.')
            text = text[:before_index]
        return text

    def as_literal_block(self, text: str) -> list[nodes.literal_block]:
        """Return list with literal_block containing `text`.

        Provisional
        """
        source = self.options['source']
        # Convert tabs to spaces unless `tab_width` is negative.
        if self.tab_width >= 0:
            text = text.expandtabs(self.tab_width)
        literal_block = nodes.literal_block(
            '', source=source, classes=self.options.get('class', []))
        literal_block.source = source
        literal_block.line = self.options.get('start-line', 0) + 1
        self.add_name(literal_block)
        if 'number-lines' in self.options:
            firstline = self.options['number-lines'] or 1
            text = text.removesuffix('\n')
            lastline = firstline + len(text.splitlines())
            tokens = NumberLines([([], text)], firstline, lastline)
            for classes, value in tokens:
                if classes:
                    literal_block += nodes.inline('', value, classes=classes)
                else:
                    literal_block += nodes.Text(value)
        else:
            literal_block += nodes.Text(text)
        return [literal_block]

    def as_code_block(self, text: str) -> list[nodes.literal_block]:
        """Pass `text` to the `CodeBlock` directive class.

        Provisional.
        """
        # convert tabs to spaces unless `tab_width` is negative:
        if self.tab_width >= 0:
            text = text.expandtabs(self.tab_width)
        codeblock = CodeBlock(self.name,
                              [self.options.pop('code')],  # pass as argument
                              self.options,
                              [text.removesuffix('\n')],   # content
                              self.lineno,
                              self.content_offset,
                              self.block_text,
                              self.state,
                              self.state_machine,
                              )
        return codeblock.run()

    def custom_parse(self, text: str) -> list:
        """Parse with custom parser.

        Parse with ``self.options['parser']`` into a new (dummy) document,
        apply the parser's default transforms,
        return child elements.

        Provisional.
        """
        parser = self.options['parser']()
        settings = frontend.get_default_settings(parser)
        # update with current document settings
        for k, v in self.settings.__dict__.items():
            setattr(settings, k, v)
        settings._source = self.options['source']
        document = utils.new_document(settings._source, settings)
        document.include_log = self.state.document.include_log
        document.ids = self.state.document.ids
        document.nameids = self.state.document.nameids
        document.nametypes = self.state.document.nametypes
        parser.parse(text, document)
        self.state.document.parse_messages.extend(document.parse_messages)
        # clean up doctree and complete parsing
        document.transformer.populate_from_components((parser,))
        document.transformer.apply_transforms()
        self.state.document.transform_messages.extend(
            document.transform_messages)
        return document.children

    def insert_into_input_lines(self, text: str) -> None:
        """Insert file content into the rST input of the calling parser.

        Returns an empty list to comply with the API of `Directive.run()`.

        Provisional.
        """
        source = self.options['source']
        textlines = statemachine.string2lines(text, self.tab_width,
                                              convert_whitespace=True)
        # Sanity checks:
        # excessively long lines
        for i, line in enumerate(textlines):
            if len(line) > self.settings.line_length_limit:
                line_no = i + 1 + self.options.get('start-line', 0)
                raise self.warning(f'"{source}": line {line_no} exceeds the'
                                   ' line-length-limit.')
        # circular inclusion
        include_log = self.state.document.include_log
        if not include_log:  # new document, initialize with document source
            current_source = utils.relative_path(
                                None, self.state.document.current_source)
            include_log.append((current_source, (None, None, '', '')))
        if (source, self.clip_options) in include_log:
            source_chain = (pth for (pth, opt) in reversed(include_log))
            inclusion_chain = '\n> '.join((source, *source_chain))
            raise self.warning(f'circular inclusion in "{self.name}"'
                               f' directive:\n{inclusion_chain}')
        include_log.append((source, self.clip_options))
        # marker for removing log entry (cf. parsers.rst.states.Body.comment())
        textlines += ['', f'.. end of inclusion from "{source}"']

        self.state_machine.insert_input(textlines, source)
        # TODO: if startline != 0, line numbers are wrong.


class Raw(Directive):

    """
    Pass through content unchanged

    Content is included in output based on type argument

    Content may be included inline (content section of directive) or
    imported from a file or url.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'file': directives.path,
                   'url': directives.uri,
                   'encoding': directives.encoding,
                   'class': directives.class_option}
    has_content = True

    def run(self):
        settings = self.state.document.settings
        if (not settings.raw_enabled
            or (not settings.file_insertion_enabled
                and ('file' in self.options or 'url' in self.options))):
            raise self.warning('"%s" directive disabled.' % self.name)
        attributes = {'format': ' '.join(self.arguments[0].lower().split())}
        encoding = self.options.get('encoding', settings.input_encoding)
        error_handler = settings.input_encoding_error_handler
        if self.content:
            if 'file' in self.options or 'url' in self.options:
                raise self.error(
                    '"%s" directive may not both specify an external file '
                    'and have content.' % self.name)
            text = '\n'.join(self.content)
        elif 'file' in self.options:
            if 'url' in self.options:
                raise self.error(
                    'The "file" and "url" options may not be simultaneously '
                    'specified for the "%s" directive.' % self.name)
            path = adapt_path(self.options['file'],
                              self.state.document.current_source,
                              settings.root_prefix)
            try:
                raw_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=error_handler)
            except OSError as error:
                raise self.severe(f'Problems with "{self.name}" directive '
                                  f'path:\n{io.error_string(error)}.')
            else:
                # TODO: currently, raw input files are recorded as
                # dependencies even if not used for the chosen output format.
                settings.record_dependencies.add(path)
            try:
                text = raw_file.read()
            except UnicodeError as error:
                raise self.severe(f'Problem with "{self.name}" directive:\n'
                                  + io.error_string(error))
            attributes['source'] = path
        elif 'url' in self.options:
            source = self.options['url']
            try:
                raw_text = urlopen(source).read()
            except (URLError, OSError) as error:
                raise self.severe(f'Problems with "{self.name}" directive URL '
                                  f'"{self.options["url"]}":\n'
                                  f'{io.error_string(error)}.')
            raw_file = io.StringInput(source=raw_text, source_path=source,
                                      encoding=encoding,
                                      error_handler=error_handler)
            try:
                text = raw_file.read()
            except UnicodeError as error:
                raise self.severe(f'Problem with "{self.name}" directive:\n'
                                  + io.error_string(error))
            attributes['source'] = source
        else:
            # This will always fail because there is no content.
            self.assert_has_content()
        raw_node = nodes.raw('', text, classes=self.options.get('class', []),
                             **attributes)
        (raw_node.source,
         raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
        return [raw_node]


class Replace(Directive):

    has_content = True

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        self.assert_has_content()
        text = '\n'.join(self.content)
        element = nodes.Element(text)
        self.state.nested_parse(self.content, self.content_offset,
                                element)
        # element might contain [paragraph] + system_message(s)
        node = None
        messages = []
        for elem in element:
            if not node and isinstance(elem, nodes.paragraph):
                node = elem
            elif isinstance(elem, nodes.system_message):
                elem['backrefs'] = []
                messages.append(elem)
            else:
                return [
                    self.reporter.error(
                        f'Error in "{self.name}" directive: may contain '
                        'a single paragraph only.', line=self.lineno)]
        if node:
            return messages + node.children
        return messages


class Unicode(Directive):

    r"""
    Convert Unicode character codes (numbers) to characters.  Codes may be
    decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
    ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
    entities (e.g. ``&#x262E;``).  Text following ".." is a comment and is
    ignored.  Spaces are ignored, and any other text remains as-is.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'trim': directives.flag,
                   'ltrim': directives.flag,
                   'rtrim': directives.flag}

    comment_pattern = re.compile(r'( |\n|^)\.\. ')

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        substitution_definition = self.state_machine.node
        if 'trim' in self.options:
            substitution_definition.attributes['ltrim'] = 1
            substitution_definition.attributes['rtrim'] = 1
        if 'ltrim' in self.options:
            substitution_definition.attributes['ltrim'] = 1
        if 'rtrim' in self.options:
            substitution_definition.attributes['rtrim'] = 1
        codes = self.comment_pattern.split(self.arguments[0])[0].split()
        element = nodes.Element()
        for code in codes:
            try:
                decoded = directives.unicode_code(code)
            except ValueError as error:
                raise self.error('Invalid character code: %s\n%s'
                                 % (code, io.error_string(error)))
            element += nodes.Text(decoded)
        return element.children


class Class(Directive):

    """
    Set a "class" attribute on the directive content or the next element.
    When applied to the next element, a "pending" element is inserted, and a
    transform does the work later.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    has_content = True

    def run(self):
        try:
            class_value = directives.class_option(self.arguments[0])
        except ValueError:
            raise self.error(
                'Invalid class attribute value for "%s" directive: "%s".'
                % (self.name, self.arguments[0]))
        node_list = []
        if self.content:
            container = nodes.Element()
            self.state.nested_parse(self.content, self.content_offset,
                                    container)
            for node in container:
                node['classes'].extend(class_value)
            node_list.extend(container.children)
        else:
            pending = nodes.pending(
                misc.ClassAttribute,
                {'class': class_value, 'directive': self.name},
                self.block_text)
            self.state_machine.document.note_pending(pending)
            node_list.append(pending)
        return node_list


class Role(Directive):

    has_content = True

    argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
                                  % ((states.Inliner.simplename,) * 2))

    def run(self):
        """Dynamically create and register a custom interpreted text role."""
        if self.content_offset > self.lineno or not self.content:
            raise self.error('"%s" directive requires arguments on the first '
                             'line.' % self.name)
        args = self.content[0]
        match = self.argument_pattern.match(args)
        if not match:
            raise self.error('"%s" directive arguments not valid role names: '
                             '"%s".' % (self.name, args))
        new_role_name = match.group(1)
        base_role_name = match.group(3)
        messages = []
        if base_role_name:
            base_role, messages = roles.role(
                base_role_name, self.state_machine.language, self.lineno,
                self.state.reporter)
            if base_role is None:
                error = self.state.reporter.error(
                    'Unknown interpreted text role "%s".' % base_role_name,
                    nodes.literal_block(self.block_text, self.block_text),
                    line=self.lineno)
                return messages + [error]
        else:
            base_role = roles.generic_custom_role
        assert not hasattr(base_role, 'arguments'), (
            'Supplemental directive arguments for "%s" directive not '
            'supported (specified by "%r" role).' % (self.name, base_role))
        try:
            converted_role = convert_directive_function(base_role)
            (arguments, options, content, content_offset
             ) = self.state.parse_directive_block(
                    self.content[1:], self.content_offset,
                    converted_role, option_presets={})
        except states.MarkupError as detail:
            error = self.reporter.error(
                'Error in "%s" directive:\n%s.' % (self.name, detail),
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            return messages + [error]
        if 'class' not in options:
            try:
                options['class'] = directives.class_option(new_role_name)
            except ValueError as detail:
                error = self.reporter.error(
                    'Invalid argument for "%s" directive:\n%s.'
                    % (self.name, detail),
                    nodes.literal_block(self.block_text, self.block_text),
                    line=self.lineno)
                return messages + [error]
        role = roles.CustomRole(new_role_name, base_role, options, content)
        roles.register_local_role(new_role_name, role)
        return messages


class DefaultRole(Directive):

    """Set the default interpreted text role."""

    optional_arguments = 1
    final_argument_whitespace = False

    def run(self):
        if not self.arguments:
            if '' in roles._roles:
                # restore the "default" default role
                del roles._roles['']
            return []
        role_name = self.arguments[0]
        role, messages = roles.role(role_name, self.state_machine.language,
                                    self.lineno, self.state.reporter)
        if role is None:
            error = self.state.reporter.error(
                'Unknown interpreted text role "%s".' % role_name,
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            return messages + [error]
        roles._roles[''] = role
        return messages


class Title(Directive):

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True

    def run(self):
        self.state_machine.document['title'] = self.arguments[0]
        return []


class MetaBody(states.SpecializedBody):

    def field_marker(self, match, context, next_state):
        """Meta element."""
        node, blank_finish = self.parsemeta(match)
        self.parent += node
        return [], next_state, []

    def parsemeta(self, match):
        name = self.parse_field_marker(match)
        name = nodes.unescape(utils.escape2null(name))
        (indented, indent, line_offset, blank_finish
         ) = self.state_machine.get_first_known_indented(match.end())
        node = nodes.meta()
        node['content'] = nodes.unescape(utils.escape2null(
                                            ' '.join(indented)))
        if not indented:
            line = self.state_machine.line
            msg = self.reporter.info(
                  'No content for meta tag "%s".' % name,
                  nodes.literal_block(line, line))
            return msg, blank_finish
        tokens = name.split()
        try:
            attname, val = utils.extract_name_value(tokens[0])[0]
            node[attname.lower()] = val
        except utils.NameValueError:
            node['name'] = tokens[0]
        for token in tokens[1:]:
            try:
                attname, val = utils.extract_name_value(token)[0]
                node[attname.lower()] = val
            except utils.NameValueError as detail:
                line = self.state_machine.line
                msg = self.reporter.error(
                      'Error parsing meta tag attribute "%s": %s.'
                      % (token, detail), nodes.literal_block(line, line))
                return msg, blank_finish
        return node, blank_finish


class Meta(Directive):

    has_content = True

    SMkwargs = {'state_classes': (MetaBody,)}

    def run(self):
        self.assert_has_content()
        node = nodes.Element()
        new_line_offset, blank_finish = self.state.nested_list_parse(
            self.content, self.content_offset, node,
            initial_state='MetaBody', blank_finish=True,
            state_machine_kwargs=self.SMkwargs)
        if (new_line_offset - self.content_offset) != len(self.content):
            # incomplete parse of block?
            error = self.reporter.error(
                'Invalid meta directive.',
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            node += error
        # insert at begin of document
        index = self.state.document.first_child_not_matching_class(
                                        (nodes.Titular, nodes.meta)) or 0
        self.state.document[index:index] = node.children
        return []


class Date(Directive):

    has_content = True

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        format_str = '\n'.join(self.content) or '%Y-%m-%d'
        # @@@
        # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable?
        # Pro: Docutils-generated documentation
        #      can easily be part of `reproducible software builds`__
        #
        #      __ https://reproducible-builds.org/
        #
        # Con: Changes the specs, hard to predict behaviour,
        #
        # See also the discussion about \date \time \year in TeX
        # http://tug.org/pipermail/tex-k/2016-May/002704.html
        source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
        if source_date_epoch:
            text = time.strftime(format_str,
                                 time.gmtime(int(source_date_epoch)))
        else:
            text = time.strftime(format_str)
        return [nodes.Text(text)]


class TestDirective(Directive):

    """This directive is useful only for testing purposes."""

    optional_arguments = 1
    final_argument_whitespace = True
    option_spec = {'option': directives.unchanged_required}
    has_content = True

    def run(self):
        if self.content:
            text = '\n'.join(self.content)
            info = self.reporter.info(
                'Directive processed. Type="%s", arguments=%r, options=%r, '
                'content:' % (self.name, self.arguments, self.options),
                nodes.literal_block(text, text), line=self.lineno)
        else:
            info = self.reporter.info(
                'Directive processed. Type="%s", arguments=%r, options=%r, '
                'content: None' % (self.name, self.arguments, self.options),
                line=self.lineno)
        return [info]