1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
|
# $Id: universal.py 10136 2025-05-20 15:48:27Z milde $
# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.
"""
Transforms needed by most or all documents:
- `Decorations`: Generate a document's header & footer.
- `ExposeInternals`: Expose internal attributes.
- `Messages`: Placement of system messages generated after parsing.
- `FilterMessages`: Remove system messages below verbosity threshold.
- `TestMessages`: Like `Messages`, used on test runs.
- `StripComments`: Remove comment elements from the document tree.
- `StripClassesAndElements`: Remove elements with classes
in `self.document.settings.strip_elements_with_classes`
and class values in `self.document.settings.strip_classes`.
- `SmartQuotes`: Replace ASCII quotation marks with typographic form.
- `Validate`: Validate the document tree, report violations as warning.
"""
from __future__ import annotations
__docformat__ = 'reStructuredText'
import os
import re
import time
from docutils import nodes, utils
from docutils.transforms import Transform
from docutils.utils import smartquotes
class Decorations(Transform):
"""
Populate a document's decoration element (header, footer).
"""
default_priority = 820
def apply(self) -> None:
header_nodes = self.generate_header()
if header_nodes:
decoration = self.document.get_decoration()
header = decoration.get_header()
header.extend(header_nodes)
footer_nodes = self.generate_footer()
if footer_nodes:
decoration = self.document.get_decoration()
footer = decoration.get_footer()
footer.extend(footer_nodes)
def generate_header(self):
return None
def generate_footer(self):
# @@@ Text is hard-coded for now.
# Should be made dynamic (language-dependent).
# @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
# for the datestamp?
# See https://sourceforge.net/p/docutils/patches/132/
# and https://reproducible-builds.org/specs/source-date-epoch/
settings = self.document.settings
if (settings.generator or settings.datestamp
or settings.source_link or settings.source_url):
text = []
if (settings.source_link and settings._source
or settings.source_url):
if settings.source_url:
source = settings.source_url
else:
source = utils.relative_path(settings.output_path,
settings._source)
text.extend([
nodes.reference('', 'View document source',
refuri=source),
nodes.Text('.\n')])
if settings.datestamp:
source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
if source_date_epoch:
datestamp = time.strftime(settings.datestamp,
time.gmtime(int(source_date_epoch)))
else:
datestamp = time.strftime(settings.datestamp, time.gmtime())
text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
if settings.generator:
text.extend([
nodes.Text('Generated by '),
nodes.reference('', 'Docutils',
refuri='https://docutils.sourceforge.io/'),
nodes.Text(' from '),
nodes.reference('', 'reStructuredText',
refuri='https://docutils.sourceforge.io/'
'rst.html'),
nodes.Text(' source.\n')])
return [nodes.paragraph('', '', *text)]
else:
return None
class ExposeInternals(Transform):
"""
Expose internal attributes if ``expose_internals`` setting is set.
"""
default_priority = 840
def not_Text(self, node) -> bool:
return not isinstance(node, nodes.Text)
def apply(self) -> None:
if self.document.settings.expose_internals:
for node in self.document.findall(self.not_Text):
for att in self.document.settings.expose_internals:
value = getattr(node, att, None)
if value is not None:
node['internal:' + att] = value
class Messages(Transform):
"""Handle "loose" messages.
Place system messages generated by parsing or transforms that are not
attached to the document tree into a dedicated section of the document.
"""
default_priority = 860
def apply(self) -> None:
messages = [*self.document.parse_messages,
*self.document.transform_messages]
loose_messages = [msg for msg in messages if not msg.parent]
if loose_messages:
section = nodes.section(classes=['system-messages'])
# @@@ get this from the language module?
section += nodes.title('', 'Docutils System Messages')
section += loose_messages
self.document.transform_messages[:] = []
self.document += section
class FilterMessages(Transform):
"""
Remove system messages below verbosity threshold.
Also convert <problematic> nodes referencing removed messages
to <Text> nodes and remove "System Messages" section if empty.
"""
default_priority = 870
def apply(self) -> None:
removed_ids = [] # IDs of removed system messages
for node in tuple(self.document.findall(nodes.system_message)):
if node['level'] < self.document.reporter.report_level:
node.parent.remove(node)
for _id in node['ids']:
self.document.ids.pop(_id, None) # remove ID registration
removed_ids.append(_id)
for node in tuple(self.document.findall(nodes.problematic)):
if 'refid' in node and node['refid'] in removed_ids:
node.parent.replace(node, nodes.Text(node.astext()))
for node in self.document.findall(nodes.section):
if "system-messages" in node['classes'] and len(node) == 1:
node.parent.remove(node)
class TestMessages(Transform):
"""
Append all post-parse system messages to the end of the document.
Used for testing purposes.
"""
# marker for pytest to ignore this class during test discovery
__test__ = False
default_priority = 880
def apply(self) -> None:
for msg in self.document.transform_messages:
if not msg.parent:
self.document += msg
class StripComments(Transform):
"""
Remove comment elements from the document tree (only if the
``strip_comments`` setting is enabled).
"""
default_priority = 740
def apply(self) -> None:
if self.document.settings.strip_comments:
for node in tuple(self.document.findall(nodes.comment)):
node.parent.remove(node)
class StripClassesAndElements(Transform):
"""
Remove from the document tree all elements with classes in
`self.document.settings.strip_elements_with_classes` and all "classes"
attribute values in `self.document.settings.strip_classes`.
"""
default_priority = 420
def apply(self) -> None:
if self.document.settings.strip_elements_with_classes:
self.strip_elements = {*self.document.settings
.strip_elements_with_classes}
# Iterate over a tuple as removing the current node
# corrupts the iterator returned by `iter`:
for node in tuple(self.document.findall(self.check_classes)):
node.parent.remove(node)
if not self.document.settings.strip_classes:
return
strip_classes = self.document.settings.strip_classes
for node in self.document.findall(nodes.Element):
for class_value in strip_classes:
try:
node['classes'].remove(class_value)
except ValueError:
pass
def check_classes(self, node) -> bool:
if not isinstance(node, nodes.Element):
return False
for class_value in node['classes'][:]:
if class_value in self.strip_elements:
return True
return False
class SmartQuotes(Transform):
"""
Replace ASCII quotation marks with typographic form.
Also replace multiple dashes with em-dash/en-dash characters.
"""
default_priority = 855
nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
"""Do not apply "smartquotes" to instances of these block-level nodes."""
literal_nodes = (nodes.FixedTextElement, nodes.Special,
nodes.image, nodes.literal, nodes.math,
nodes.raw, nodes.problematic)
"""Do not apply smartquotes to instances of these inline nodes."""
smartquotes_action = 'qDe'
"""Setting to select smartquote transformations.
The default 'qDe' educates normal quote characters: (", '),
em- and en-dashes (---, --) and ellipses (...).
"""
def __init__(self, document, startnode) -> None:
Transform.__init__(self, document, startnode=startnode)
self.unsupported_languages = set()
def get_tokens(self, txtnodes):
# A generator that yields ``(texttype, nodetext)`` tuples for a list
# of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
for node in txtnodes:
if (isinstance(node.parent, self.literal_nodes)
or isinstance(node.parent.parent, self.literal_nodes)):
yield 'literal', str(node)
else:
# SmartQuotes uses backslash escapes instead of null-escapes
# Insert backslashes before escaped "active" characters.
txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
yield 'plain', txt
def apply(self) -> None:
smart_quotes = self.document.settings.setdefault('smart_quotes',
False)
if not smart_quotes:
return
try:
alternative = smart_quotes.startswith('alt')
except AttributeError:
alternative = False
document_language = self.document.settings.language_code
lc_smartquotes = self.document.settings.smartquotes_locales
if lc_smartquotes:
smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
# "Educate" quotes in normal text. Handle each block of text
# (TextElement node) as a unit to keep context around inline nodes:
for node in self.document.findall(nodes.TextElement):
# skip preformatted text blocks and special elements:
if isinstance(node, self.nodes_to_skip):
continue
# nested TextElements are not "block-level" elements:
if isinstance(node.parent, nodes.TextElement):
continue
# list of text nodes in the "text block":
txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
if not isinstance(txtnode.parent,
nodes.option_string)]
# language: use typographical quotes for language "lang"
lang = node.get_language_code(document_language)
# use alternative form if `smart-quotes` setting starts with "alt":
if alternative:
if '-x-altquot' in lang:
lang = lang.replace('-x-altquot', '')
else:
lang += '-x-altquot'
# drop unsupported subtags:
for tag in utils.normalize_language_tag(lang):
if tag in smartquotes.smartchars.quotes:
lang = tag
break
else: # language not supported -- keep ASCII quotes
if lang not in self.unsupported_languages:
self.document.reporter.warning(
'No smart quotes defined for language "%s".' % lang,
base_node=node)
self.unsupported_languages.add(lang)
lang = ''
# Iterator educating quotes in plain text:
# (see "utils/smartquotes.py" for the attribute setting)
teacher = smartquotes.educate_tokens(
self.get_tokens(txtnodes),
attr=self.smartquotes_action, language=lang)
for txtnode, newtext in zip(txtnodes, teacher):
txtnode.parent.replace(txtnode, nodes.Text(newtext))
self.unsupported_languages.clear()
class Validate(Transform):
"""
Validate the document tree, report violations as warning.
"""
default_priority = 835 # between misc.Transitions and universal.Messages
def apply(self) -> None:
if not getattr(self.document.settings, 'validate', False):
return
for node in self.document.findall():
try:
node.validate(recursive=False)
except nodes.ValidationError as e:
self.document.reporter.warning(
str(e), base_node=e.problematic_element or node)
# TODO: append a link to the Document Tree documentation?
# nodes.paragraph('', 'See ',
# nodes.reference('', 'doctree.html#document',
# refuri='https://docutils.sourceforge.io/'
# 'docs/ref/doctree.html#document'),
|