1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from typing import List, Optional, Sequence, Tuple, Union
from libcst._nodes.whitespace import (
Comment,
COMMENT_RE,
EmptyLine,
Newline,
NEWLINE_RE,
ParenthesizedWhitespace,
SIMPLE_WHITESPACE_RE,
SimpleWhitespace,
TrailingWhitespace,
)
from libcst._parser.types.config import BaseWhitespaceParserConfig
from libcst._parser.types.whitespace_state import WhitespaceState as State
# BEGIN PARSER ENTRYPOINTS
def parse_simple_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> SimpleWhitespace:
# The match never fails because the pattern can match an empty string
lines = config.lines
# pyre-fixme[16]: Optional type has no attribute `group`.
ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0)
ws_line_list = [ws_line]
while "\\" in ws_line:
# continuation character
state.line += 1
state.column = 0
ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(
0
)
ws_line_list.append(ws_line)
# TODO: we could special-case the common case where there's no continuation
# character to avoid list construction and joining.
# once we've finished collecting continuation characters
state.column += len(ws_line)
return SimpleWhitespace("".join(ws_line_list))
def parse_empty_lines(
config: BaseWhitespaceParserConfig,
state: State,
*,
override_absolute_indent: Optional[str] = None,
) -> Sequence[EmptyLine]:
# If override_absolute_indent is true, then we need to parse all lines up
# to and including the last line that is indented at our level. These all
# belong to the footer and not to the next line's leading_lines. All lines
# that have indent=False and come after the last line where indent=True
# do not belong to this node.
state_for_line = State(
state.line, state.column, state.absolute_indent, state.is_parenthesized
)
lines: List[Tuple[State, EmptyLine]] = []
while True:
el = _parse_empty_line(
config, state_for_line, override_absolute_indent=override_absolute_indent
)
if el is None:
break
# Store the updated state with the element we parsed. Then make a new state
# clone for the next element.
lines.append((state_for_line, el))
state_for_line = State(
state_for_line.line,
state_for_line.column,
state.absolute_indent,
state.is_parenthesized,
)
if override_absolute_indent is not None:
# We need to find the last element that is indented, and then split the list
# at that point.
for i in range(len(lines) - 1, -1, -1):
if lines[i][1].indent:
lines = lines[: (i + 1)]
break
else:
# We didn't find any lines, throw them all away
lines = []
if lines:
# Update the state line and column to match the last line actually parsed.
final_state: State = lines[-1][0]
state.line = final_state.line
state.column = final_state.column
return [r[1] for r in lines]
def parse_trailing_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> TrailingWhitespace:
trailing_whitespace = _parse_trailing_whitespace(config, state)
if trailing_whitespace is None:
raise Exception(
"Internal Error: Failed to parse TrailingWhitespace. This should never "
+ "happen because a TrailingWhitespace is never optional in the grammar, "
+ "so this error should've been caught by parso first."
)
return trailing_whitespace
def parse_parenthesizable_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> Union[SimpleWhitespace, ParenthesizedWhitespace]:
if state.is_parenthesized:
# First, try parenthesized (don't need speculation because it either
# parses or doesn't modify state).
parenthesized_whitespace = _parse_parenthesized_whitespace(config, state)
if parenthesized_whitespace is not None:
return parenthesized_whitespace
# Now, just parse and return a simple whitespace
return parse_simple_whitespace(config, state)
# END PARSER ENTRYPOINTS
# BEGIN PARSER INTERNAL PRODUCTIONS
def _parse_empty_line(
config: BaseWhitespaceParserConfig,
state: State,
*,
override_absolute_indent: Optional[str] = None,
) -> Optional[EmptyLine]:
# begin speculative parsing
speculative_state = State(
state.line, state.column, state.absolute_indent, state.is_parenthesized
)
try:
indent = _parse_indent(
config, speculative_state, override_absolute_indent=override_absolute_indent
)
except Exception:
# We aren't on a new line, speculative parsing failed
return None
whitespace = parse_simple_whitespace(config, speculative_state)
comment = _parse_comment(config, speculative_state)
newline = _parse_newline(config, speculative_state)
if newline is None:
# speculative parsing failed
return None
# speculative parsing succeeded
state.line = speculative_state.line
state.column = speculative_state.column
# don't need to copy absolute_indent/is_parenthesized because they don't change.
return EmptyLine(indent, whitespace, comment, newline)
def _parse_indent(
config: BaseWhitespaceParserConfig,
state: State,
*,
override_absolute_indent: Optional[str] = None,
) -> bool:
"""
Returns True if indentation was found, otherwise False.
"""
absolute_indent = (
override_absolute_indent
if override_absolute_indent is not None
else state.absolute_indent
)
line_str = config.lines[state.line - 1]
if state.column != 0:
if state.column == len(line_str) and state.line == len(config.lines):
# We're at EOF, treat this as a failed speculative parse
return False
raise Exception("Internal Error: Column should be 0 when parsing an indent.")
if line_str.startswith(absolute_indent, state.column):
state.column += len(absolute_indent)
return True
return False
def _parse_comment(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[Comment]:
comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column)
if comment_match is None:
return None
comment = comment_match.group(0)
state.column += len(comment)
return Comment(comment)
def _parse_newline(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[Newline]:
# begin speculative parsing
line_str = config.lines[state.line - 1]
newline_match = NEWLINE_RE.match(line_str, state.column)
if newline_match is not None:
# speculative parsing succeeded
newline_str = newline_match.group(0)
state.column += len(newline_str)
if state.column != len(line_str):
raise Exception("Internal Error: Found a newline, but it wasn't the EOL.")
if state.line < len(config.lines):
# this newline was the end of a line, and there's another line,
# therefore we should move to the next line
state.line += 1
state.column = 0
if newline_str == config.default_newline:
# Just inherit it from the Module instead of explicitly setting it.
return Newline()
else:
return Newline(newline_str)
else: # no newline was found, speculative parsing failed
return None
def _parse_trailing_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[TrailingWhitespace]:
# Begin speculative parsing
speculative_state = State(
state.line, state.column, state.absolute_indent, state.is_parenthesized
)
whitespace = parse_simple_whitespace(config, speculative_state)
comment = _parse_comment(config, speculative_state)
newline = _parse_newline(config, speculative_state)
if newline is None:
# Speculative parsing failed
return None
# Speculative parsing succeeded
state.line = speculative_state.line
state.column = speculative_state.column
# don't need to copy absolute_indent/is_parenthesized because they don't change.
return TrailingWhitespace(whitespace, comment, newline)
def _parse_parenthesized_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[ParenthesizedWhitespace]:
first_line = _parse_trailing_whitespace(config, state)
if first_line is None:
# Speculative parsing failed
return None
empty_lines = ()
while True:
empty_line = _parse_empty_line(config, state)
if empty_line is None:
# This isn't an empty line, so parse it below
break
empty_lines = empty_lines + (empty_line,)
indent = _parse_indent(config, state)
last_line = parse_simple_whitespace(config, state)
return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line)
|