1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional, Pattern, Sequence
from libcst._add_slots import add_slots
from libcst._nodes.base import BaseLeaf, BaseValueToken, CSTNode, CSTValidationError
from libcst._nodes.internal import (
CodegenState,
visit_optional,
visit_required,
visit_sequence,
)
from libcst._visitors import CSTVisitorT
# SimpleWhitespace includes continuation characters, which must be followed immediately
# by a newline. SimpleWhitespace does not include other kinds of newlines, because those
# may have semantic significance.
SIMPLE_WHITESPACE_RE: Pattern[str] = re.compile(r"([ \f\t]|\\(\r\n?|\n))*", re.UNICODE)
NEWLINE_RE: Pattern[str] = re.compile(r"\r\n?|\n", re.UNICODE)
COMMENT_RE: Pattern[str] = re.compile(r"#[^\r\n]*", re.UNICODE)
class BaseParenthesizableWhitespace(CSTNode, ABC):
"""
This is the kind of whitespace you might see inside the body of a statement or
expression between two tokens. This is the most common type of whitespace.
The list of allowed characters in a whitespace depends on whether it is found
inside a parenthesized expression or not. This class allows nodes which can be
found inside or outside a ``()``, ``[]`` or ``{}`` section to accept either
whitespace form.
https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining
Parenthesizable whitespace may contain a backslash character (``\\``), when used as
a line-continuation character. While the continuation character isn't technically
"whitespace", it serves the same purpose.
Parenthesizable whitespace is often non-semantic (optional), but in cases where
whitespace solves a grammar ambiguity between tokens (e.g. ``if test``, versus
``iftest``), it has some semantic value.
"""
__slots__ = ()
# TODO: Should we somehow differentiate places where we require non-zero whitespace
# with a separate type?
@property
@abstractmethod
def empty(self) -> bool:
"""
Indicates that this node is empty (zero whitespace characters).
"""
...
@add_slots
@dataclass(frozen=True)
class SimpleWhitespace(BaseParenthesizableWhitespace, BaseValueToken):
"""
This is the kind of whitespace you might see inside the body of a statement or
expression between two tokens. This is the most common type of whitespace.
A simple whitespace cannot contain a newline character unless it is directly
preceeded by a line continuation character (``\\``). It can contain zero or
more spaces or tabs. If you need a newline character without a line continuation
character, use :class:`ParenthesizedWhitespace` instead.
Simple whitespace is often non-semantic (optional), but in cases where whitespace
solves a grammar ambiguity between tokens (e.g. ``if test``, versus ``iftest``),
it has some semantic value.
An example :class:`SimpleWhitespace` containing a space, a line continuation,
a newline and another space is as follows::
SimpleWhitespace(r" \\\\n ")
"""
#: Actual string value of the simple whitespace. A legal value contains only
#: space, ``\f`` and ``\t`` characters, and optionally a continuation
#: (``\``) followed by a newline (``\n`` or ``\r\n``).
value: str
def _validate(self) -> None:
if SIMPLE_WHITESPACE_RE.fullmatch(self.value) is None:
raise CSTValidationError(
f"Got non-whitespace value for whitespace node: {repr(self.value)}"
)
@property
def empty(self) -> bool:
"""
Indicates that this node is empty (zero whitespace characters).
"""
return len(self.value) == 0
@add_slots
@dataclass(frozen=True)
class Newline(BaseLeaf):
"""
Represents the newline that ends an :class:`EmptyLine` or a statement (as part of
:class:`TrailingWhitespace`).
Other newlines may occur in the document after continuation characters (the
backslash, ``\\``), but those newlines are treated as part of the
:class:`SimpleWhitespace`.
Optionally, a value can be specified in order to overwrite the module's default
newline. In general, this should be left as the default, which is ``None``. This
is allowed because python modules are permitted to mix multiple unambiguous
newline markers.
"""
#: A value of ``None`` indicates that the module's default newline sequence should
#: be used. A value of ``\n`` or ``\r\n`` indicates that the exact value specified
#: will be used for this newline.
value: Optional[str] = None
def _validate(self) -> None:
value = self.value
if value and NEWLINE_RE.fullmatch(value) is None:
raise CSTValidationError(
f"Got an invalid value for newline node: {repr(value)}"
)
def _codegen_impl(self, state: CodegenState) -> None:
value = self.value
state.add_token(state.default_newline if value is None else value)
@add_slots
@dataclass(frozen=True)
class Comment(BaseValueToken):
"""
A comment including the leading pound (``#``) character.
The leading pound character is included in the 'value' property (instead of being
stripped) to help re-enforce the idea that whitespace immediately after the pound
character may be significant. E.g::
# comment with whitespace at the start (usually preferred)
#comment without whitespace at the start (usually not desirable)
Usually wrapped in a :class:`TrailingWhitespace` or :class:`EmptyLine` node.
"""
#: The comment itself. Valid values start with the pound (``#``) character followed
#: by zero or more non-newline characters. Comments cannot include newlines.
value: str
def _validate(self) -> None:
if COMMENT_RE.fullmatch(self.value) is None:
raise CSTValidationError(
f"Got non-comment value for comment node: {repr(self.value)}"
)
@add_slots
@dataclass(frozen=True)
class TrailingWhitespace(CSTNode):
"""
The whitespace at the end of a line after a statement. If a line contains only
whitespace, :class:`EmptyLine` should be used instead.
"""
#: Any simple whitespace before any comment or newline.
whitespace: SimpleWhitespace = SimpleWhitespace.field("")
#: An optional comment appearing after any simple whitespace.
comment: Optional[Comment] = None
#: The newline character that terminates this trailing whitespace.
newline: Newline = Newline.field()
def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "TrailingWhitespace":
return TrailingWhitespace(
whitespace=visit_required(self, "whitespace", self.whitespace, visitor),
comment=visit_optional(self, "comment", self.comment, visitor),
newline=visit_required(self, "newline", self.newline, visitor),
)
def _codegen_impl(self, state: CodegenState) -> None:
self.whitespace._codegen(state)
comment = self.comment
if comment is not None:
comment._codegen(state)
self.newline._codegen(state)
@add_slots
@dataclass(frozen=True)
class EmptyLine(CSTNode):
"""
Represents a line with only whitespace/comments. Usually statements will own any
:class:`EmptyLine` nodes above themselves, and a :class:`Module` will own the
document's header/footer :class:`EmptyLine` nodes.
"""
#: An empty line doesn't have to correspond to the current indentation level. For
#: example, this happens when all trailing whitespace is stripped and there is
#: an empty line between two statements.
indent: bool = True
#: Extra whitespace after the indent, but before the comment.
whitespace: SimpleWhitespace = SimpleWhitespace.field("")
#: An optional comment appearing after the indent and extra whitespace.
comment: Optional[Comment] = None
#: The newline character that terminates this empty line.
newline: Newline = Newline.field()
def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "EmptyLine":
return EmptyLine(
indent=self.indent,
whitespace=visit_required(self, "whitespace", self.whitespace, visitor),
comment=visit_optional(self, "comment", self.comment, visitor),
newline=visit_required(self, "newline", self.newline, visitor),
)
def _codegen_impl(self, state: CodegenState) -> None:
if self.indent:
state.add_indent_tokens()
self.whitespace._codegen(state)
comment = self.comment
if comment is not None:
comment._codegen(state)
self.newline._codegen(state)
@add_slots
@dataclass(frozen=True)
class ParenthesizedWhitespace(BaseParenthesizableWhitespace):
"""
This is the kind of whitespace you might see inside a parenthesized expression
or statement between two tokens when there is a newline without a line
continuation (``\\``) character.
https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining
A parenthesized whitespace cannot be empty since it requires at least one
:class:`TrailingWhitespace`. If you have whitespace that does not contain
comments or newlines, use :class:`SimpleWhitespace` instead.
"""
#: The whitespace that comes after the previous node, up to and including
#: the end-of-line comment and newline.
first_line: TrailingWhitespace = TrailingWhitespace.field()
#: Any lines after the first that contain only indentation and/or comments.
empty_lines: Sequence[EmptyLine] = ()
#: Whether or not the final simple whitespace is indented regularly.
indent: bool = False
#: Extra whitespace after the indent, but before the next node.
last_line: SimpleWhitespace = SimpleWhitespace.field("")
def _visit_and_replace_children(
self, visitor: CSTVisitorT
) -> "ParenthesizedWhitespace":
return ParenthesizedWhitespace(
first_line=visit_required(self, "first_line", self.first_line, visitor),
empty_lines=visit_sequence(self, "empty_lines", self.empty_lines, visitor),
indent=self.indent,
last_line=visit_required(self, "last_line", self.last_line, visitor),
)
def _codegen_impl(self, state: CodegenState) -> None:
self.first_line._codegen(state)
for line in self.empty_lines:
line._codegen(state)
if self.indent:
state.add_indent_tokens()
self.last_line._codegen(state)
@property
def empty(self) -> bool:
"""
Indicates that this node is empty (zero whitespace characters). For
:class:`ParenthesizedWhitespace` this will always be ``False``.
"""
# Its not possible to have a ParenthesizedWhitespace with zero characers.
# If we did, the TrailingWhitespace would not have parsed.
return False
|