File: whitespace.py

package info (click to toggle)
python-libcst 1.4.0-1.2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,928 kB
  • sloc: python: 76,235; makefile: 10; sh: 2
file content (296 lines) | stat: -rw-r--r-- 11,240 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.


import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional, Pattern, Sequence

from libcst._add_slots import add_slots
from libcst._nodes.base import BaseLeaf, BaseValueToken, CSTNode, CSTValidationError
from libcst._nodes.internal import (
    CodegenState,
    visit_optional,
    visit_required,
    visit_sequence,
)
from libcst._visitors import CSTVisitorT

# SimpleWhitespace includes continuation characters, which must be followed immediately
# by a newline. SimpleWhitespace does not include other kinds of newlines, because those
# may have semantic significance.
SIMPLE_WHITESPACE_RE: Pattern[str] = re.compile(r"([ \f\t]|\\(\r\n?|\n))*", re.UNICODE)
NEWLINE_RE: Pattern[str] = re.compile(r"\r\n?|\n", re.UNICODE)
COMMENT_RE: Pattern[str] = re.compile(r"#[^\r\n]*", re.UNICODE)


class BaseParenthesizableWhitespace(CSTNode, ABC):
    """
    This is the kind of whitespace you might see inside the body of a statement or
    expression between two tokens. This is the most common type of whitespace.

    The list of allowed characters in a whitespace depends on whether it is found
    inside a parenthesized expression or not. This class allows nodes which can be
    found inside or outside a ``()``, ``[]`` or ``{}`` section to accept either
    whitespace form.

    https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining

    Parenthesizable whitespace may contain a backslash character (``\\``), when used as
    a line-continuation character. While the continuation character isn't technically
    "whitespace", it serves the same purpose.

    Parenthesizable whitespace is often non-semantic (optional), but in cases where
    whitespace solves a grammar ambiguity between tokens (e.g. ``if test``, versus
    ``iftest``), it has some semantic value.
    """

    __slots__ = ()

    # TODO: Should we somehow differentiate places where we require non-zero whitespace
    # with a separate type?

    @property
    @abstractmethod
    def empty(self) -> bool:
        """
        Indicates that this node is empty (zero whitespace characters).
        """
        ...


@add_slots
@dataclass(frozen=True)
class SimpleWhitespace(BaseParenthesizableWhitespace, BaseValueToken):
    """
    This is the kind of whitespace you might see inside the body of a statement or
    expression between two tokens. This is the most common type of whitespace.

    A simple whitespace cannot contain a newline character unless it is directly
    preceeded by a line continuation character (``\\``). It can contain zero or
    more spaces or tabs. If you need a newline character without a line continuation
    character, use :class:`ParenthesizedWhitespace` instead.

    Simple whitespace is often non-semantic (optional), but in cases where whitespace
    solves a grammar ambiguity between tokens (e.g. ``if test``, versus ``iftest``),
    it has some semantic value.

    An example :class:`SimpleWhitespace` containing a space, a line continuation,
    a newline and another space is as follows::

        SimpleWhitespace(r" \\\\n ")
    """

    #: Actual string value of the simple whitespace. A legal value contains only
    #: space, ``\f`` and ``\t`` characters, and optionally a continuation
    #: (``\``) followed by a newline (``\n`` or ``\r\n``).
    value: str

    def _validate(self) -> None:
        if SIMPLE_WHITESPACE_RE.fullmatch(self.value) is None:
            raise CSTValidationError(
                f"Got non-whitespace value for whitespace node: {repr(self.value)}"
            )

    @property
    def empty(self) -> bool:
        """
        Indicates that this node is empty (zero whitespace characters).
        """

        return len(self.value) == 0


@add_slots
@dataclass(frozen=True)
class Newline(BaseLeaf):
    """
    Represents the newline that ends an :class:`EmptyLine` or a statement (as part of
    :class:`TrailingWhitespace`).

    Other newlines may occur in the document after continuation characters (the
    backslash, ``\\``), but those newlines are treated as part of the
    :class:`SimpleWhitespace`.

    Optionally, a value can be specified in order to overwrite the module's default
    newline. In general, this should be left as the default, which is ``None``. This
    is allowed because python modules are permitted to mix multiple unambiguous
    newline markers.
    """

    #: A value of ``None`` indicates that the module's default newline sequence should
    #: be used. A value of ``\n`` or ``\r\n`` indicates that the exact value specified
    #: will be used for this newline.
    value: Optional[str] = None

    def _validate(self) -> None:
        value = self.value
        if value and NEWLINE_RE.fullmatch(value) is None:
            raise CSTValidationError(
                f"Got an invalid value for newline node: {repr(value)}"
            )

    def _codegen_impl(self, state: CodegenState) -> None:
        value = self.value
        state.add_token(state.default_newline if value is None else value)


@add_slots
@dataclass(frozen=True)
class Comment(BaseValueToken):
    """
    A comment including the leading pound (``#``) character.

    The leading pound character is included in the 'value' property (instead of being
    stripped) to help re-enforce the idea that whitespace immediately after the pound
    character may be significant. E.g::

        # comment with whitespace at the start (usually preferred)
        #comment without whitespace at the start (usually not desirable)

    Usually wrapped in a :class:`TrailingWhitespace` or :class:`EmptyLine` node.
    """

    #: The comment itself. Valid values start with the pound (``#``) character followed
    #: by zero or more non-newline characters. Comments cannot include newlines.
    value: str

    def _validate(self) -> None:
        if COMMENT_RE.fullmatch(self.value) is None:
            raise CSTValidationError(
                f"Got non-comment value for comment node: {repr(self.value)}"
            )


@add_slots
@dataclass(frozen=True)
class TrailingWhitespace(CSTNode):
    """
    The whitespace at the end of a line after a statement. If a line contains only
    whitespace, :class:`EmptyLine` should be used instead.
    """

    #: Any simple whitespace before any comment or newline.
    whitespace: SimpleWhitespace = SimpleWhitespace.field("")

    #: An optional comment appearing after any simple whitespace.
    comment: Optional[Comment] = None

    #: The newline character that terminates this trailing whitespace.
    newline: Newline = Newline.field()

    def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "TrailingWhitespace":
        return TrailingWhitespace(
            whitespace=visit_required(self, "whitespace", self.whitespace, visitor),
            comment=visit_optional(self, "comment", self.comment, visitor),
            newline=visit_required(self, "newline", self.newline, visitor),
        )

    def _codegen_impl(self, state: CodegenState) -> None:
        self.whitespace._codegen(state)
        comment = self.comment
        if comment is not None:
            comment._codegen(state)
        self.newline._codegen(state)


@add_slots
@dataclass(frozen=True)
class EmptyLine(CSTNode):
    """
    Represents a line with only whitespace/comments. Usually statements will own any
    :class:`EmptyLine` nodes above themselves, and a :class:`Module` will own the
    document's header/footer :class:`EmptyLine` nodes.
    """

    #: An empty line doesn't have to correspond to the current indentation level. For
    #: example, this happens when all trailing whitespace is stripped and there is
    #: an empty line between two statements.
    indent: bool = True

    #: Extra whitespace after the indent, but before the comment.
    whitespace: SimpleWhitespace = SimpleWhitespace.field("")

    #: An optional comment appearing after the indent and extra whitespace.
    comment: Optional[Comment] = None

    #: The newline character that terminates this empty line.
    newline: Newline = Newline.field()

    def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "EmptyLine":
        return EmptyLine(
            indent=self.indent,
            whitespace=visit_required(self, "whitespace", self.whitespace, visitor),
            comment=visit_optional(self, "comment", self.comment, visitor),
            newline=visit_required(self, "newline", self.newline, visitor),
        )

    def _codegen_impl(self, state: CodegenState) -> None:
        if self.indent:
            state.add_indent_tokens()
        self.whitespace._codegen(state)
        comment = self.comment
        if comment is not None:
            comment._codegen(state)
        self.newline._codegen(state)


@add_slots
@dataclass(frozen=True)
class ParenthesizedWhitespace(BaseParenthesizableWhitespace):
    """
    This is the kind of whitespace you might see inside a parenthesized expression
    or statement between two tokens when there is a newline without a line
    continuation (``\\``) character.

    https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining

    A parenthesized whitespace cannot be empty since it requires at least one
    :class:`TrailingWhitespace`. If you have whitespace that does not contain
    comments or newlines, use :class:`SimpleWhitespace` instead.
    """

    #: The whitespace that comes after the previous node, up to and including
    #: the end-of-line comment and newline.
    first_line: TrailingWhitespace = TrailingWhitespace.field()

    #: Any lines after the first that contain only indentation and/or comments.
    empty_lines: Sequence[EmptyLine] = ()

    #: Whether or not the final simple whitespace is indented regularly.
    indent: bool = False

    #: Extra whitespace after the indent, but before the next node.
    last_line: SimpleWhitespace = SimpleWhitespace.field("")

    def _visit_and_replace_children(
        self, visitor: CSTVisitorT
    ) -> "ParenthesizedWhitespace":
        return ParenthesizedWhitespace(
            first_line=visit_required(self, "first_line", self.first_line, visitor),
            empty_lines=visit_sequence(self, "empty_lines", self.empty_lines, visitor),
            indent=self.indent,
            last_line=visit_required(self, "last_line", self.last_line, visitor),
        )

    def _codegen_impl(self, state: CodegenState) -> None:
        self.first_line._codegen(state)
        for line in self.empty_lines:
            line._codegen(state)
        if self.indent:
            state.add_indent_tokens()
        self.last_line._codegen(state)

    @property
    def empty(self) -> bool:
        """
        Indicates that this node is empty (zero whitespace characters). For
        :class:`ParenthesizedWhitespace` this will always be ``False``.
        """

        # Its not possible to have a ParenthesizedWhitespace with zero characers.
        # If we did, the TrailingWhitespace would not have parsed.
        return False