1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
from __future__ import annotations
import html
import string
from typing import TYPE_CHECKING
from docutils import nodes
if TYPE_CHECKING:
from collections.abc import Set
def get_description(
doctree: nodes.document,
description_length: int,
known_titles: Set[str] = frozenset(),
) -> str:
mcv = DescriptionParser(
doctree, desc_len=description_length, known_titles=known_titles
)
doctree.walkabout(mcv)
return mcv.description
class DescriptionParser(nodes.NodeVisitor):
"""Finds the title and creates a description from a doctree."""
def __init__(
self,
document: nodes.document,
*,
desc_len: int,
known_titles: Set[str] = frozenset(),
) -> None:
super().__init__(document)
self.description = ''
self.desc_len = desc_len
self.list_level = 0
self.known_titles = known_titles
self.first_title_found = False
# Exceptions can't be raised from dispatch_departure()
# This is used to loop the stop call back to the next dispatch_visit()
self.stop = False
def dispatch_visit(self, node: nodes.Element) -> None:
if self.stop:
raise nodes.StopTraversal
# Skip comments & all admonitions
if isinstance(node, (nodes.Admonition, nodes.Invisible)):
raise nodes.SkipNode
# Mark start of nested lists
if isinstance(node, nodes.Sequential):
self.list_level += 1
if self.list_level > 1:
self.description += '-'
# Skip the first title if it's the title of the page
if not self.first_title_found and isinstance(node, nodes.title):
self.first_title_found = True
if node.astext() in self.known_titles:
raise nodes.SkipNode
if isinstance(node, nodes.raw) or isinstance(node.parent, nodes.literal_block):
raise nodes.SkipNode
# Only include leaf nodes in the description
if len(node.children) == 0:
text = node.astext().replace('\r', '').replace('\n', ' ').strip()
# Ensure string contains HTML-safe characters
text = html.escape(text, quote=True)
# Remove double spaces
while text.find(' ') != -1:
text = text.replace(' ', ' ')
# Put a space between elements if one does not already exist.
if (
len(self.description) > 0
and len(text) > 0
and self.description[-1] not in string.whitespace
and text[0] not in string.whitespace + string.punctuation
):
self.description += ' '
self.description += text
def dispatch_departure(self, node: nodes.Element) -> None:
# Separate title from text
if isinstance(node, nodes.title):
self.description += ':'
# Separate list elements
if isinstance(node, nodes.Part):
self.description += ','
# Separate end of list from text
if isinstance(node, nodes.Sequential):
if self.description and self.description[-1] == ',':
self.description = self.description[:-1]
self.description += '.'
self.list_level -= 1
# Check for length
if len(self.description) > self.desc_len:
self.description = self.description[: self.desc_len]
if self.desc_len >= 3:
self.description = self.description[:-3] + '...'
self.stop = True
|