File: document.py

package info (click to toggle)
python-sybil 9.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,148 kB
  • sloc: python: 4,510; makefile: 90
file content (228 lines) | stat: -rw-r--r-- 8,819 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
import ast
import re
from ast import AsyncFunctionDef, FunctionDef, ClassDef, Constant, Module, Expr
from bisect import bisect
from collections.abc import Iterator
from io import open
from itertools import chain
from pathlib import Path
from typing import Any, Dict
from typing import List, Tuple

from .example import Example, SybilFailure, NotEvaluated
from .python import import_path
from .region import Region
from .text import LineNumberOffsets
from .typing import Parser, Evaluator


class Document:
    """
    This is Sybil's representation of a documentation source file.
    It will be instantiated by Sybil and provided to each parser in turn.

    Different types of document can be handled by subclassing to provide the
    required :any:`evaluation <push_evaluator>`. The required file extensions, such as ``'.py'``,
    can then be mapped to these subclasses using :class:`Sybil's <Sybil>`
    ``document_types`` parameter.
    """

    def __init__(self, text: str, path: str) -> None:
        #: This is the text of the documentation source file.
        self.text: str = text
        #: This is the absolute path of the documentation source file.
        self.path: str = path
        self.end: int = len(text)
        self.regions: List[Tuple[int, Region]] = []
        #: This dictionary is the namespace in which all examples parsed from
        #: this document will be evaluated.
        self.namespace: Dict[str, Any] = {}
        self.evaluators: list[Evaluator] = []

    @classmethod
    def parse(cls, path: str, *parsers: Parser, encoding: str = 'utf-8') -> 'Document':
        """
        Read the text from the supplied path and parse it into a document
        using the supplied parsers.
        """
        with open(path, encoding=encoding) as source:
            text = source.read()
        document = cls(text, path)
        for parser in parsers:
            for region in parser(document):
                document.add(region)
        return document

    def line_column(self, position: int) -> str:
        """
        Return a line and column location in this document based on a character
        position.
        """
        line = self.text.count('\n', 0, position)+1
        col = position - self.text.rfind('\n', 0, position)
        return 'line {}, column {}'.format(line, col)

    def region_details(self, region: Region) -> str:
        return '{!r} from {} to {}'.format(
            region,
            self.line_column(region.start),
            self.line_column(region.end)
        )

    def raise_overlap(self, *regions: Region) -> None:
        reprs = []
        for region in regions:
            reprs.append(self.region_details(region))
        raise ValueError('{} overlaps {}'.format(*reprs))

    def add(self, region: Region) -> None:
        if region.start < 0:
            raise ValueError('{} is before start of document'.format(
                self.region_details(region)
            ))
        if region.end > self.end:
            raise ValueError('{} goes beyond end of document'.format(
                self.region_details(region)
            ))
        entry = (region.start, region)
        index = bisect(self.regions, entry)
        if index > 0:
            previous = self.regions[index-1][1]
            if previous.end > region.start:
                self.raise_overlap(previous, region)
        if index < len(self.regions):
            next = self.regions[index][1]
            if next.start < region.end:
                self.raise_overlap(region, next)
        self.regions.insert(index, entry)

    def examples(self) -> Iterator[Example]:
        """
        Return the :term:`examples <example>` contained within this document.
        """
        line = 1
        place = 0
        for _, region in self.regions:
            line += self.text.count('\n', place, region.start)
            line_start = self.text.rfind('\n', place, region.start)
            place = region.start
            yield Example(self,
                          line, region.start-line_start,
                          region, self.namespace)

    def __iter__(self) -> Iterator[Example]:
        return self.examples()

    def push_evaluator(self, evaluator: Evaluator) -> None:
        """
        Push an :any:`Evaluator` onto this document's stack of evaluators
        if it is not already in that stack.

        When evaluating an :any:`Example`, any evaluators in the stack will be tried in order,
        starting with the most recently pushed. If an evaluator raises a :any:`NotEvaluated`
        exception, then the next evaluator in the stack will be attempted.

        If the stack is empty or all evaluators present raise :any:`NotEvaluated`, then the
        example's evaluator will be used. This is the most common case!
        """
        if evaluator not in self.evaluators:
            self.evaluators.append(evaluator)

    def pop_evaluator(self, evaluator: Evaluator) -> None:
        """
        Pop an :any:`Evaluator` off this document's stack of evaluators.
        If it is not present in that stack, the method does nothing.
        """
        if evaluator in self.evaluators:
            self.evaluators.remove(evaluator)

    def evaluate(self, example: Example, evaluator: Evaluator) -> None:

        __tracebackhide__ = True

        for current_evaluator in chain(reversed(self.evaluators), (evaluator,)):
            try:
                result = current_evaluator(example)
            except NotEvaluated:
                continue
            else:
                if result:
                    raise SybilFailure(example, result if isinstance(result, str) else repr(result))
                else:
                    break
        else:
            raise SybilFailure(example, f'{evaluator!r} should not raise NotEvaluated()')


DOCSTRING_PUNCTUATION = re.compile('[rf]?(["\']{3}|["\'])')


class PythonDocument(Document):
    """
    A :class:`~sybil.Document` type that imports the document's source
    file as a Python module, making names within it available in the document's
    :attr:`~sybil.Document.namespace`.
    """

    def __init__(self, text: str, path: str) -> None:
        super().__init__(text, path)
        self.push_evaluator(self.import_document)

    def import_document(self, example: Example) -> None:
        """
        Imports the document's source file as a Python module when the first
        :class:`~sybil.example.Example` from it is evaluated.
        """
        module = import_path(Path(self.path))
        self.namespace.update(module.__dict__)
        self.pop_evaluator(self.import_document)
        raise NotEvaluated()


class PythonDocStringDocument(PythonDocument):
    """
    A :class:`~sybil.document.PythonDocument` subclass that only considers the text of
    docstrings in the document's source.
    """

    @staticmethod
    def extract_docstrings(python_source_code: str) -> Iterator[Tuple[int, int, str]]:
        line_offsets = LineNumberOffsets(python_source_code)
        for node in ast.walk(ast.parse(python_source_code)):
            if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
                continue
            if not (node.body and isinstance(node.body[0], Expr)):
                continue
            docstring = node.body[0].value
            if isinstance(docstring, Constant):
                text = docstring.value
            else:
                continue
            if not isinstance(text, str):
                continue
            node_start = line_offsets.get(docstring.lineno-1, docstring.col_offset)
            end_lineno = docstring.end_lineno or 1
            end_col_offset = docstring.end_col_offset or 0
            node_end = line_offsets.get(end_lineno-1, end_col_offset)
            punc = DOCSTRING_PUNCTUATION.match(python_source_code, node_start, node_end)
            punc_size = len(punc.group(1))
            start = punc.end()
            end = node_end - punc_size
            yield start, end, text

    @classmethod
    def parse(cls, path: str, *parsers: Parser, encoding: str = 'utf-8') -> 'Document':
        """
        Read the text from the supplied path to a Python source file and parse any docstrings
        it contains into a document using the supplied parsers.
        """
        with open(path, encoding=encoding) as source:
            document = cls(source.read(), path)
            for start, end, text in cls.extract_docstrings(document.text):
                docstring_document = cls(text, path)
                for parser in parsers:
                    for region in parser(docstring_document):
                        region.start += start
                        region.end += start
                        document.add(region)
        return document