1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
|
# type: ignore
"""This file contains various functions used for analysis of SyntaxErrors"""
import unicodedata
from .. import debug_helper, token_utils
from ..ft_gettext import current_lang
_ = current_lang.translate
def matching_brackets(bra, ket):
return (
(bra == "(" and ket == ")")
or (bra == "[" and ket == "]")
or (bra == "{" and ket == "}")
)
def name_bracket(bracket):
names = {
"(": _("parenthesis `(`"),
")": _("parenthesis `)`"),
"[": _("square bracket `[`"),
"]": _("square bracket `]`"),
"{": _("curly bracket `{`"),
"}": _("curly bracket `}`"),
}
return names[str(bracket)] # bracket could be a Token or a str
# fmt: off
# The following has been taken from https://unicode-table.com/en/sets/quotation-marks/
bad_quotation_marks = [
"«", "»",
"‹", "›",
"„", "“",
"‟", "”",
"❝", "❞",
"❮", "❯",
"⹂", "〝",
"〞", """,
"‚", "’", "‛", "‘",
"❛", "❜",
"❟",
]
# fmt: on
def identify_bad_quote_char(char, line):
if char not in bad_quotation_marks:
return
# prevent crash if 'char' is not a single unicode character.
try:
char_name = unicodedata.name(char, "unknown")
except TypeError:
return
hint = _("Did you mean to use a normal quote character, `'` or `\"`?\n")
cause = _(
"I suspect that you used a fancy unicode quotation mark\n"
"whose name is {name}\n"
"instead of a normal single or double quote for a string."
"\n"
).format(name=char_name)
count = 0
for character in line:
if character in bad_quotation_marks:
count += 1
# In the absence of a matching quote, in some cases, perhaps another
# character was intended.
if count == 1:
if char in ["‹", "❮"]:
cause += _("Or perhaps, you meant to write a less than sign, `<`.\n")
elif char in ["›", "❯"]:
cause += _("Or perhaps, you meant to write a greater than sign, `>`.\n")
elif char in ["‚", "❟"]:
cause += _("Or perhaps, you meant to write a comma.\n")
return {"cause": cause, "suggest": hint}
def identify_bad_math_symbol(char, line):
"""Similar to identify_bad_unicode_character except that it is analyzed when
we see an 'invalid decimal literal' message."""
if char not in bad_quotation_marks:
return
# prevent crash if 'char' is not a single unicode character.
try:
char_name = unicodedata.name(char, "unknown")
except TypeError:
return
cause = _(
"I suspect that you used a fancy unicode quotation mark\n"
"whose name is {name}.\n"
"\n"
).format(name=char_name)
count = 0
for character in line:
if character in bad_quotation_marks:
count += 1
# In the absence of a matching quote, in some cases, perhaps another
# character was intended.
if count == 1:
hint = None
if char in ["‹", "❮"]:
cause += _("Perhaps, you meant to write a less than sign, `<`.\n")
hint = _("Did you mean to write a less than sign, `<`?\n")
elif char in ["›", "❯"]:
cause += _("Perhaps, you meant to write a greater than sign, `>`.\n")
hint = _("Did you mean to write a greater than sign, `>`?\n")
elif char in ["‚", "❟"]:
cause += _("Perhaps, you meant to write a comma.\n")
hint = _("Did you mean to write a comma?\n")
if hint:
return {"cause": cause, "suggest": hint}
return {}
def identify_unicode_fraction(char):
# prevent crash if 'char' is not a single unicode character.
try:
char_name = unicodedata.name(char, "unknown")
except TypeError:
return
if "FRACTION" not in char_name:
return
if char_name == "FRACTION SLASH":
hint = "Did you mean to use the division operator, `/`?\n"
cause = _(
"I suspect that you used the unicode character known as\n"
"'FRACTION SLASH', which looks similar to\n"
"but is different from the division operator `/`.\n"
)
return {"cause": cause, "suggest": hint}
hint = _("Did you use a unicode fraction?\n")
cause = _(
"I suspect that you used the unicode character `{char}`"
"meant to represent a fraction.\n"
"The name of this unicode character is {name}.\n"
).format(char=char, name=char_name)
if not char_name.startswith("VULGAR FRACTION "):
return {"cause": cause, "suggest": hint}
short_name = char_name.replace("VULGAR FRACTION ", "")
num, denom = short_name.split(" ")
for index, word in enumerate(
["ZERO", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE"]
):
if num == word:
num = index
break
else:
return {"cause": cause, "suggest": hint}
possible_choices = (
("HALF", 2),
("THIRD", 3),
("QUARTER", 4),
("FIFTH", 5),
("SIXTH", 6),
("SEVENTH", 7),
("EIGHTH", 8),
("NINTH", 9),
("TENTH", 10),
)
for string, denominator in possible_choices:
if string in denom:
break
else:
return {"cause": cause, "suggest": hint}
hint = _("Did you mean `{num}/{denom}`?\n").format(num=num, denom=denominator)
cause = _(
"You used the unicode character {char} which is known as\n"
"{name}\n"
"I suspect that you meant to write the fraction `{num}/{denom}` instead.\n"
).format(num=num, denom=denominator, char=char, name=char_name)
return {"cause": cause, "suggest": hint}
def highlight_single_token(token):
"""Highlight a single token with ^."""
return {token.start_row: " " * token.start_col + "^" * len(token.string)}
def highlight_added_token(prev_token, token):
"""Highlight a newly added token after a previously existing one"""
token = str(token)
return {prev_token.start_row: " " * prev_token.end_col + "^" * len(token)}
def highlight_two_tokens(
first, second, first_marker="^", second_marker="^", between=" "
):
"""Highlight two individual tokens, and give the possibility to use
different markers for each one.
"""
if first.start_row == second.start_row:
if first.end_col == second.start_col and between == " ":
first_marker = second_marker = "^"
mark = (
" " * first.start_col
+ first_marker * len(first.string)
+ between * (second.start_col - first.end_col)
+ second_marker * len(second.string)
)
return {first.start_row: mark}
mark_1 = " " * first.start_col + first_marker * len(first.string) + between.strip()
mark_2 = " " * second.start_col + second_marker * len(second.string)
return {first.start_row: mark_1, second.start_row: mark_2}
def highlight_range(first, last):
"""Highlight multiple tokens with ^, from first to last."""
mark = " " * first.start_col + "^" * (last.end_col - first.start_col)
return {first.start_row: mark}
def get_last_token_before_specified(bad_token, tokens, specified_token):
first = bad_token
brackets = []
prev = first
before_first = None
found_first = False
for tok in tokens:
if tok == bad_token:
found_first = True
if prev.string in "([{":
before_first = prev
if not found_first:
prev = tok
continue
if tok.string in "([{":
brackets.append(tok.string)
elif tok.string in ")]}":
if not brackets:
if before_first is not None and matching_brackets(
before_first, tok.string
):
first = before_first
else:
return first, None # should not happen; unmatched bracket
else:
bra = brackets.pop()
if not matching_brackets(bra, tok.string):
return first, None # should not happen; unmatched bracket
elif not brackets and tok == specified_token:
return first, prev
if (
tok.start_row != first.start_row or tok.is_comment()
): # statement continue on next line
if not brackets:
debug_helper.log("get_last_token_before_specified:")
debug_helper.log("line continues but not open bracket found.")
ket = "|"
else:
bra = brackets.pop()
for ket in ")]}":
if matching_brackets(bra, ket):
break
return first, (prev, ket)
prev = tok
return first, None
def highlight_before_specified_token(bad_token, tokens, specified_token):
first, last = get_last_token_before_specified(bad_token, tokens, specified_token)
if last is None:
return {first.start_row: " " * first.start_col + "^" * len(first.string)}
elif isinstance(last, tuple): # statement continue on next line
last = last[0]
marker = highlight_range(first, last)
mark = marker[first.start_row]
mark = mark[:-1] + "^-->"
return {first.start_row: mark}
return highlight_range(first, last)
def get_expression_before_specified_token(bad_token, tokens, specified_token):
first, last = get_last_token_before_specified(bad_token, tokens, specified_token)
if last is None:
return None
statement_continue = False
ket = ""
if isinstance(last, tuple): # statement continue on next line
last, ket = last
statement_continue = True
new_tokens = []
found_bad = False
for tok in tokens:
if tok == first:
found_bad = True
if not found_bad:
clone = tok.copy()
clone.string = ""
new_tokens.append(clone)
continue
if tok is last: # == would only compare string values
if statement_continue:
tok = tok.copy()
tok.string += "..." + ket
new_tokens.append(tok)
break
new_tokens.append(tok)
return token_utils.untokenize(new_tokens).strip()
def add_mark_to_new_statement(statement_object, new_statement, mark):
lines = []
for line, new_line in zip(
statement_object.entire_statement.split("\n"), new_statement.split("\n")
):
lines.append(new_line)
if line.strip() != new_line.strip():
lines.append(mark)
return "\n".join(lines)
|