File: universal.py

package info (click to toggle)
python-docutils 0.22%2Bdfsg-1
links: PTS, VCS
area: main
in suites: experimental
size: 11,448 kB
sloc: python: 53,302; lisp: 14,475; xml: 1,807; javascript: 1,032; makefile: 102; sh: 96
file content (370 lines) | stat: -rw-r--r-- 13,933 bytes
# $Id: universal.py 10136 2025-05-20 15:48:27Z milde $
# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.

"""
Transforms needed by most or all documents:

- `Decorations`: Generate a document's header & footer.
- `ExposeInternals`: Expose internal attributes.
- `Messages`: Placement of system messages generated after parsing.
- `FilterMessages`: Remove system messages below verbosity threshold.
- `TestMessages`: Like `Messages`, used on test runs.
- `StripComments`: Remove comment elements from the document tree.
- `StripClassesAndElements`: Remove elements with classes
  in `self.document.settings.strip_elements_with_classes`
  and class values in `self.document.settings.strip_classes`.
- `SmartQuotes`: Replace ASCII quotation marks with typographic form.
- `Validate`: Validate the document tree, report violations as warning.
"""

from __future__ import annotations

__docformat__ = 'reStructuredText'

import os
import re
import time
from docutils import nodes, utils
from docutils.transforms import Transform
from docutils.utils import smartquotes


class Decorations(Transform):

    """
    Populate a document's decoration element (header, footer).
    """

    default_priority = 820

    def apply(self) -> None:
        header_nodes = self.generate_header()
        if header_nodes:
            decoration = self.document.get_decoration()
            header = decoration.get_header()
            header.extend(header_nodes)
        footer_nodes = self.generate_footer()
        if footer_nodes:
            decoration = self.document.get_decoration()
            footer = decoration.get_footer()
            footer.extend(footer_nodes)

    def generate_header(self):
        return None

    def generate_footer(self):
        # @@@ Text is hard-coded for now.
        # Should be made dynamic (language-dependent).
        # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
        # for the datestamp?
        # See https://sourceforge.net/p/docutils/patches/132/
        # and https://reproducible-builds.org/specs/source-date-epoch/
        settings = self.document.settings
        if (settings.generator or settings.datestamp
            or settings.source_link or settings.source_url):
            text = []
            if (settings.source_link and settings._source
                or settings.source_url):
                if settings.source_url:
                    source = settings.source_url
                else:
                    source = utils.relative_path(settings.output_path,
                                                 settings._source)
                text.extend([
                    nodes.reference('', 'View document source',
                                    refuri=source),
                    nodes.Text('.\n')])
            if settings.datestamp:
                source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
                if source_date_epoch:
                    datestamp = time.strftime(settings.datestamp,
                                              time.gmtime(int(source_date_epoch)))
                else:
                    datestamp = time.strftime(settings.datestamp, time.gmtime())
                text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
            if settings.generator:
                text.extend([
                    nodes.Text('Generated by '),
                    nodes.reference('', 'Docutils',
                                    refuri='https://docutils.sourceforge.io/'),
                    nodes.Text(' from '),
                    nodes.reference('', 'reStructuredText',
                                    refuri='https://docutils.sourceforge.io/'
                                    'rst.html'),
                    nodes.Text(' source.\n')])
            return [nodes.paragraph('', '', *text)]
        else:
            return None


class ExposeInternals(Transform):

    """
    Expose internal attributes if ``expose_internals`` setting is set.
    """

    default_priority = 840

    def not_Text(self, node) -> bool:
        return not isinstance(node, nodes.Text)

    def apply(self) -> None:
        if self.document.settings.expose_internals:
            for node in self.document.findall(self.not_Text):
                for att in self.document.settings.expose_internals:
                    value = getattr(node, att, None)
                    if value is not None:
                        node['internal:' + att] = value


class Messages(Transform):

    """Handle "loose" messages.

    Place system messages generated by parsing or transforms that are not
    attached to the document tree into a dedicated section of the document.
    """

    default_priority = 860

    def apply(self) -> None:
        messages = [*self.document.parse_messages,
                    *self.document.transform_messages]
        loose_messages = [msg for msg in messages if not msg.parent]
        if loose_messages:
            section = nodes.section(classes=['system-messages'])
            # @@@ get this from the language module?
            section += nodes.title('', 'Docutils System Messages')
            section += loose_messages
            self.document.transform_messages[:] = []
            self.document += section


class FilterMessages(Transform):

    """
    Remove system messages below verbosity threshold.

    Also convert <problematic> nodes referencing removed messages
    to <Text> nodes and remove "System Messages" section if empty.
    """

    default_priority = 870

    def apply(self) -> None:
        removed_ids = []  # IDs of removed system messages
        for node in tuple(self.document.findall(nodes.system_message)):
            if node['level'] < self.document.reporter.report_level:
                node.parent.remove(node)
                for _id in node['ids']:
                    self.document.ids.pop(_id, None)  # remove ID registration
                    removed_ids.append(_id)
        for node in tuple(self.document.findall(nodes.problematic)):
            if 'refid' in node and node['refid'] in removed_ids:
                node.parent.replace(node, nodes.Text(node.astext()))
        for node in self.document.findall(nodes.section):
            if "system-messages" in node['classes'] and len(node) == 1:
                node.parent.remove(node)


class TestMessages(Transform):

    """
    Append all post-parse system messages to the end of the document.

    Used for testing purposes.
    """

    # marker for pytest to ignore this class during test discovery
    __test__ = False

    default_priority = 880

    def apply(self) -> None:
        for msg in self.document.transform_messages:
            if not msg.parent:
                self.document += msg


class StripComments(Transform):

    """
    Remove comment elements from the document tree (only if the
    ``strip_comments`` setting is enabled).
    """

    default_priority = 740

    def apply(self) -> None:
        if self.document.settings.strip_comments:
            for node in tuple(self.document.findall(nodes.comment)):
                node.parent.remove(node)


class StripClassesAndElements(Transform):

    """
    Remove from the document tree all elements with classes in
    `self.document.settings.strip_elements_with_classes` and all "classes"
    attribute values in `self.document.settings.strip_classes`.
    """

    default_priority = 420

    def apply(self) -> None:
        if self.document.settings.strip_elements_with_classes:
            self.strip_elements = {*self.document.settings
                                   .strip_elements_with_classes}
            # Iterate over a tuple as removing the current node
            # corrupts the iterator returned by `iter`:
            for node in tuple(self.document.findall(self.check_classes)):
                node.parent.remove(node)

        if not self.document.settings.strip_classes:
            return
        strip_classes = self.document.settings.strip_classes
        for node in self.document.findall(nodes.Element):
            for class_value in strip_classes:
                try:
                    node['classes'].remove(class_value)
                except ValueError:
                    pass

    def check_classes(self, node) -> bool:
        if not isinstance(node, nodes.Element):
            return False
        for class_value in node['classes'][:]:
            if class_value in self.strip_elements:
                return True
        return False


class SmartQuotes(Transform):

    """
    Replace ASCII quotation marks with typographic form.

    Also replace multiple dashes with em-dash/en-dash characters.
    """

    default_priority = 855

    nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
    """Do not apply "smartquotes" to instances of these block-level nodes."""

    literal_nodes = (nodes.FixedTextElement, nodes.Special,
                     nodes.image, nodes.literal, nodes.math,
                     nodes.raw, nodes.problematic)
    """Do not apply smartquotes to instances of these inline nodes."""

    smartquotes_action = 'qDe'
    """Setting to select smartquote transformations.

    The default 'qDe' educates normal quote characters: (", '),
    em- and en-dashes (---, --) and ellipses (...).
    """

    def __init__(self, document, startnode) -> None:
        Transform.__init__(self, document, startnode=startnode)
        self.unsupported_languages = set()

    def get_tokens(self, txtnodes):
        # A generator that yields ``(texttype, nodetext)`` tuples for a list
        # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
        for node in txtnodes:
            if (isinstance(node.parent, self.literal_nodes)
                or isinstance(node.parent.parent, self.literal_nodes)):
                yield 'literal', str(node)
            else:
                # SmartQuotes uses backslash escapes instead of null-escapes
                # Insert backslashes before escaped "active" characters.
                txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
                yield 'plain', txt

    def apply(self) -> None:
        smart_quotes = self.document.settings.setdefault('smart_quotes',
                                                         False)
        if not smart_quotes:
            return
        try:
            alternative = smart_quotes.startswith('alt')
        except AttributeError:
            alternative = False

        document_language = self.document.settings.language_code
        lc_smartquotes = self.document.settings.smartquotes_locales
        if lc_smartquotes:
            smartquotes.smartchars.quotes.update(dict(lc_smartquotes))

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.findall(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, self.nodes_to_skip):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
                        if not isinstance(txtnode.parent,
                                          nodes.option_string)]

            # language: use typographical quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if '-x-altquot' in lang:
                    lang = lang.replace('-x-altquot', '')
                else:
                    lang += '-x-altquot'
            # drop unsupported subtags:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else:  # language not supported -- keep ASCII quotes
                if lang not in self.unsupported_languages:
                    self.document.reporter.warning(
                        'No smart quotes defined for language "%s".' % lang,
                        base_node=node)
                self.unsupported_languages.add(lang)
                lang = ''

            # Iterator educating quotes in plain text:
            # (see "utils/smartquotes.py" for the attribute setting)
            teacher = smartquotes.educate_tokens(
                self.get_tokens(txtnodes),
                attr=self.smartquotes_action, language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(txtnode, nodes.Text(newtext))

        self.unsupported_languages.clear()


class Validate(Transform):

    """
    Validate the document tree, report violations as warning.
    """

    default_priority = 835  # between misc.Transitions and  universal.Messages

    def apply(self) -> None:
        if not getattr(self.document.settings, 'validate', False):
            return
        for node in self.document.findall():
            try:
                node.validate(recursive=False)
            except nodes.ValidationError as e:
                self.document.reporter.warning(
                    str(e), base_node=e.problematic_element or node)
        # TODO: append a link to the Document Tree documentation?
        # nodes.paragraph('', 'See ',
        # nodes.reference('', 'doctree.html#document',
        #                 refuri='https://docutils.sourceforge.io/'
        #                 'docs/ref/doctree.html#document'),