File: py_whitespace_parser.py

package info (click to toggle)
python-libcst 1.4.0-1.2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,928 kB
  • sloc: python: 76,235; makefile: 10; sh: 2
file content (260 lines) | stat: -rw-r--r-- 9,370 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import List, Optional, Sequence, Tuple, Union

from libcst._nodes.whitespace import (
    Comment,
    COMMENT_RE,
    EmptyLine,
    Newline,
    NEWLINE_RE,
    ParenthesizedWhitespace,
    SIMPLE_WHITESPACE_RE,
    SimpleWhitespace,
    TrailingWhitespace,
)
from libcst._parser.types.config import BaseWhitespaceParserConfig
from libcst._parser.types.whitespace_state import WhitespaceState as State

# BEGIN PARSER ENTRYPOINTS


def parse_simple_whitespace(
    config: BaseWhitespaceParserConfig, state: State
) -> SimpleWhitespace:
    # The match never fails because the pattern can match an empty string
    lines = config.lines
    # pyre-fixme[16]: Optional type has no attribute `group`.
    ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0)
    ws_line_list = [ws_line]
    while "\\" in ws_line:
        # continuation character
        state.line += 1
        state.column = 0
        ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(
            0
        )
        ws_line_list.append(ws_line)

    # TODO: we could special-case the common case where there's no continuation
    # character to avoid list construction and joining.

    # once we've finished collecting continuation characters
    state.column += len(ws_line)
    return SimpleWhitespace("".join(ws_line_list))


def parse_empty_lines(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> Sequence[EmptyLine]:
    # If override_absolute_indent is true, then we need to parse all lines up
    # to and including the last line that is indented at our level. These all
    # belong to the footer and not to the next line's leading_lines. All lines
    # that have indent=False and come after the last line where indent=True
    # do not belong to this node.
    state_for_line = State(
        state.line, state.column, state.absolute_indent, state.is_parenthesized
    )
    lines: List[Tuple[State, EmptyLine]] = []
    while True:
        el = _parse_empty_line(
            config, state_for_line, override_absolute_indent=override_absolute_indent
        )
        if el is None:
            break

        # Store the updated state with the element we parsed. Then make a new state
        # clone for the next element.
        lines.append((state_for_line, el))
        state_for_line = State(
            state_for_line.line,
            state_for_line.column,
            state.absolute_indent,
            state.is_parenthesized,
        )

    if override_absolute_indent is not None:
        # We need to find the last element that is indented, and then split the list
        # at that point.
        for i in range(len(lines) - 1, -1, -1):
            if lines[i][1].indent:
                lines = lines[: (i + 1)]
                break
        else:
            # We didn't find any lines, throw them all away
            lines = []

    if lines:
        # Update the state line and column to match the last line actually parsed.
        final_state: State = lines[-1][0]
        state.line = final_state.line
        state.column = final_state.column
    return [r[1] for r in lines]


def parse_trailing_whitespace(
    config: BaseWhitespaceParserConfig, state: State
) -> TrailingWhitespace:
    trailing_whitespace = _parse_trailing_whitespace(config, state)
    if trailing_whitespace is None:
        raise Exception(
            "Internal Error: Failed to parse TrailingWhitespace. This should never "
            + "happen because a TrailingWhitespace is never optional in the grammar, "
            + "so this error should've been caught by parso first."
        )
    return trailing_whitespace


def parse_parenthesizable_whitespace(
    config: BaseWhitespaceParserConfig, state: State
) -> Union[SimpleWhitespace, ParenthesizedWhitespace]:
    if state.is_parenthesized:
        # First, try parenthesized (don't need speculation because it either
        # parses or doesn't modify state).
        parenthesized_whitespace = _parse_parenthesized_whitespace(config, state)
        if parenthesized_whitespace is not None:
            return parenthesized_whitespace
    # Now, just parse and return a simple whitespace
    return parse_simple_whitespace(config, state)


# END PARSER ENTRYPOINTS
# BEGIN PARSER INTERNAL PRODUCTIONS


def _parse_empty_line(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> Optional[EmptyLine]:
    # begin speculative parsing
    speculative_state = State(
        state.line, state.column, state.absolute_indent, state.is_parenthesized
    )
    try:
        indent = _parse_indent(
            config, speculative_state, override_absolute_indent=override_absolute_indent
        )
    except Exception:
        # We aren't on a new line, speculative parsing failed
        return None
    whitespace = parse_simple_whitespace(config, speculative_state)
    comment = _parse_comment(config, speculative_state)
    newline = _parse_newline(config, speculative_state)
    if newline is None:
        # speculative parsing failed
        return None
    # speculative parsing succeeded
    state.line = speculative_state.line
    state.column = speculative_state.column
    # don't need to copy absolute_indent/is_parenthesized because they don't change.
    return EmptyLine(indent, whitespace, comment, newline)


def _parse_indent(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> bool:
    """
    Returns True if indentation was found, otherwise False.
    """
    absolute_indent = (
        override_absolute_indent
        if override_absolute_indent is not None
        else state.absolute_indent
    )
    line_str = config.lines[state.line - 1]
    if state.column != 0:
        if state.column == len(line_str) and state.line == len(config.lines):
            # We're at EOF, treat this as a failed speculative parse
            return False
        raise Exception("Internal Error: Column should be 0 when parsing an indent.")
    if line_str.startswith(absolute_indent, state.column):
        state.column += len(absolute_indent)
        return True
    return False


def _parse_comment(
    config: BaseWhitespaceParserConfig, state: State
) -> Optional[Comment]:
    comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column)
    if comment_match is None:
        return None
    comment = comment_match.group(0)
    state.column += len(comment)
    return Comment(comment)


def _parse_newline(
    config: BaseWhitespaceParserConfig, state: State
) -> Optional[Newline]:
    # begin speculative parsing
    line_str = config.lines[state.line - 1]
    newline_match = NEWLINE_RE.match(line_str, state.column)
    if newline_match is not None:
        # speculative parsing succeeded
        newline_str = newline_match.group(0)
        state.column += len(newline_str)
        if state.column != len(line_str):
            raise Exception("Internal Error: Found a newline, but it wasn't the EOL.")
        if state.line < len(config.lines):
            # this newline was the end of a line, and there's another line,
            # therefore we should move to the next line
            state.line += 1
            state.column = 0
        if newline_str == config.default_newline:
            # Just inherit it from the Module instead of explicitly setting it.
            return Newline()
        else:
            return Newline(newline_str)
    else:  # no newline was found, speculative parsing failed
        return None


def _parse_trailing_whitespace(
    config: BaseWhitespaceParserConfig, state: State
) -> Optional[TrailingWhitespace]:
    # Begin speculative parsing
    speculative_state = State(
        state.line, state.column, state.absolute_indent, state.is_parenthesized
    )
    whitespace = parse_simple_whitespace(config, speculative_state)
    comment = _parse_comment(config, speculative_state)
    newline = _parse_newline(config, speculative_state)
    if newline is None:
        # Speculative parsing failed
        return None
    # Speculative parsing succeeded
    state.line = speculative_state.line
    state.column = speculative_state.column
    # don't need to copy absolute_indent/is_parenthesized because they don't change.
    return TrailingWhitespace(whitespace, comment, newline)


def _parse_parenthesized_whitespace(
    config: BaseWhitespaceParserConfig, state: State
) -> Optional[ParenthesizedWhitespace]:
    first_line = _parse_trailing_whitespace(config, state)
    if first_line is None:
        # Speculative parsing failed
        return None
    empty_lines = ()
    while True:
        empty_line = _parse_empty_line(config, state)
        if empty_line is None:
            # This isn't an empty line, so parse it below
            break
        empty_lines = empty_lines + (empty_line,)
    indent = _parse_indent(config, state)
    last_line = parse_simple_whitespace(config, state)
    return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line)