File: syntax_utils.py

package info (click to toggle)
python-friendly-traceback 0.7.62%2Bgit20240811.d7dbff6-1.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 9,264 kB
  • sloc: python: 21,500; makefile: 4
file content (327 lines) | stat: -rw-r--r-- 10,777 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# type: ignore
"""This file contains various functions used for analysis of SyntaxErrors"""

import unicodedata

from .. import debug_helper, token_utils
from ..ft_gettext import current_lang

_ = current_lang.translate


def matching_brackets(bra, ket):
    return (
        (bra == "(" and ket == ")")
        or (bra == "[" and ket == "]")
        or (bra == "{" and ket == "}")
    )


def name_bracket(bracket):
    names = {
        "(": _("parenthesis `(`"),
        ")": _("parenthesis `)`"),
        "[": _("square bracket `[`"),
        "]": _("square bracket `]`"),
        "{": _("curly bracket `{`"),
        "}": _("curly bracket `}`"),
    }
    return names[str(bracket)]  # bracket could be a Token or a str


# fmt: off
# The following has been taken from https://unicode-table.com/en/sets/quotation-marks/
bad_quotation_marks = [
    "«", "»",
    "‹", "›",
    "„", "“",
    "‟", "”",
    "❝", "❞",
    "❮", "❯",
    "⹂", "〝",
    "〞", """,
    "‚", "’", "‛", "‘",
    "❛", "❜",
    "❟",
]
# fmt: on


def identify_bad_quote_char(char, line):
    if char not in bad_quotation_marks:
        return

    # prevent crash if 'char' is not a single unicode character.
    try:
        char_name = unicodedata.name(char, "unknown")
    except TypeError:
        return

    hint = _("Did you mean to use a normal quote character, `'` or `\"`?\n")
    cause = _(
        "I suspect that you used a fancy unicode quotation mark\n"
        "whose name is {name}\n"
        "instead of a normal single or double quote for a string."
        "\n"
    ).format(name=char_name)
    count = 0
    for character in line:
        if character in bad_quotation_marks:
            count += 1

    # In the absence of a matching quote, in some cases, perhaps another
    # character was intended.
    if count == 1:
        if char in ["‹", "❮"]:
            cause += _("Or perhaps, you meant to write a less than sign, `<`.\n")
        elif char in ["›", "❯"]:
            cause += _("Or perhaps, you meant to write a greater than sign, `>`.\n")
        elif char in ["‚", "❟"]:
            cause += _("Or perhaps, you meant to write a comma.\n")

    return {"cause": cause, "suggest": hint}


def identify_bad_math_symbol(char, line):
    """Similar to identify_bad_unicode_character except that it is analyzed when
    we see an 'invalid decimal literal' message."""
    if char not in bad_quotation_marks:
        return

    # prevent crash if 'char' is not a single unicode character.
    try:
        char_name = unicodedata.name(char, "unknown")
    except TypeError:
        return

    cause = _(
        "I suspect that you used a fancy unicode quotation mark\n"
        "whose name is {name}.\n"
        "\n"
    ).format(name=char_name)
    count = 0
    for character in line:
        if character in bad_quotation_marks:
            count += 1

    # In the absence of a matching quote, in some cases, perhaps another
    # character was intended.
    if count == 1:
        hint = None
        if char in ["‹", "❮"]:
            cause += _("Perhaps, you meant to write a less than sign, `<`.\n")
            hint = _("Did you mean to write a less than sign, `<`?\n")
        elif char in ["›", "❯"]:
            cause += _("Perhaps, you meant to write a greater than sign, `>`.\n")
            hint = _("Did you mean to write a greater than sign, `>`?\n")
        elif char in ["‚", "❟"]:
            cause += _("Perhaps, you meant to write a comma.\n")
            hint = _("Did you mean to write a comma?\n")
        if hint:
            return {"cause": cause, "suggest": hint}

    return {}


def identify_unicode_fraction(char):
    # prevent crash if 'char' is not a single unicode character.
    try:
        char_name = unicodedata.name(char, "unknown")
    except TypeError:
        return
    if "FRACTION" not in char_name:
        return
    if char_name == "FRACTION SLASH":
        hint = "Did you mean to use the division operator, `/`?\n"
        cause = _(
            "I suspect that you used the unicode character known as\n"
            "'FRACTION SLASH', which looks similar to\n"
            "but is different from the division operator `/`.\n"
        )
        return {"cause": cause, "suggest": hint}

    hint = _("Did you use a unicode fraction?\n")
    cause = _(
        "I suspect that you used the unicode character `{char}`"
        "meant to represent a fraction.\n"
        "The name of this unicode character is {name}.\n"
    ).format(char=char, name=char_name)

    if not char_name.startswith("VULGAR FRACTION "):
        return {"cause": cause, "suggest": hint}

    short_name = char_name.replace("VULGAR FRACTION ", "")
    num, denom = short_name.split(" ")
    for index, word in enumerate(
        ["ZERO", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE"]
    ):
        if num == word:
            num = index
            break
    else:
        return {"cause": cause, "suggest": hint}

    possible_choices = (
        ("HALF", 2),
        ("THIRD", 3),
        ("QUARTER", 4),
        ("FIFTH", 5),
        ("SIXTH", 6),
        ("SEVENTH", 7),
        ("EIGHTH", 8),
        ("NINTH", 9),
        ("TENTH", 10),
    )
    for string, denominator in possible_choices:
        if string in denom:
            break
    else:
        return {"cause": cause, "suggest": hint}

    hint = _("Did you mean `{num}/{denom}`?\n").format(num=num, denom=denominator)
    cause = _(
        "You used the unicode character {char} which is known as\n"
        "{name}\n"
        "I suspect that you meant to write the fraction `{num}/{denom}` instead.\n"
    ).format(num=num, denom=denominator, char=char, name=char_name)
    return {"cause": cause, "suggest": hint}


def highlight_single_token(token):
    """Highlight a single token with ^."""
    return {token.start_row: " " * token.start_col + "^" * len(token.string)}


def highlight_added_token(prev_token, token):
    """Highlight a newly added token after a previously existing one"""
    token = str(token)
    return {prev_token.start_row: " " * prev_token.end_col + "^" * len(token)}


def highlight_two_tokens(
    first, second, first_marker="^", second_marker="^", between=" "
):
    """Highlight two individual tokens, and give the possibility to use
    different markers for each one.
    """
    if first.start_row == second.start_row:
        if first.end_col == second.start_col and between == " ":
            first_marker = second_marker = "^"
        mark = (
            " " * first.start_col
            + first_marker * len(first.string)
            + between * (second.start_col - first.end_col)
            + second_marker * len(second.string)
        )
        return {first.start_row: mark}
    mark_1 = " " * first.start_col + first_marker * len(first.string) + between.strip()
    mark_2 = " " * second.start_col + second_marker * len(second.string)
    return {first.start_row: mark_1, second.start_row: mark_2}


def highlight_range(first, last):
    """Highlight multiple tokens with ^, from first to last."""
    mark = " " * first.start_col + "^" * (last.end_col - first.start_col)
    return {first.start_row: mark}


def get_last_token_before_specified(bad_token, tokens, specified_token):
    first = bad_token
    brackets = []
    prev = first
    before_first = None
    found_first = False
    for tok in tokens:
        if tok == bad_token:
            found_first = True
            if prev.string in "([{":
                before_first = prev
        if not found_first:
            prev = tok
            continue
        if tok.string in "([{":
            brackets.append(tok.string)
        elif tok.string in ")]}":
            if not brackets:
                if before_first is not None and matching_brackets(
                    before_first, tok.string
                ):
                    first = before_first
                else:
                    return first, None  # should not happen; unmatched bracket
            else:
                bra = brackets.pop()
                if not matching_brackets(bra, tok.string):
                    return first, None  # should not happen; unmatched bracket
        elif not brackets and tok == specified_token:
            return first, prev
        if (
            tok.start_row != first.start_row or tok.is_comment()
        ):  # statement continue on next line
            if not brackets:
                debug_helper.log("get_last_token_before_specified:")
                debug_helper.log("line continues but not open bracket found.")
                ket = "|"
            else:
                bra = brackets.pop()
                for ket in ")]}":
                    if matching_brackets(bra, ket):
                        break
            return first, (prev, ket)
        prev = tok

    return first, None


def highlight_before_specified_token(bad_token, tokens, specified_token):
    first, last = get_last_token_before_specified(bad_token, tokens, specified_token)
    if last is None:
        return {first.start_row: " " * first.start_col + "^" * len(first.string)}
    elif isinstance(last, tuple):  # statement continue on next line
        last = last[0]
        marker = highlight_range(first, last)
        mark = marker[first.start_row]
        mark = mark[:-1] + "^-->"
        return {first.start_row: mark}

    return highlight_range(first, last)


def get_expression_before_specified_token(bad_token, tokens, specified_token):
    first, last = get_last_token_before_specified(bad_token, tokens, specified_token)
    if last is None:
        return None
    statement_continue = False
    ket = ""
    if isinstance(last, tuple):  # statement continue on next line
        last, ket = last
        statement_continue = True
    new_tokens = []
    found_bad = False
    for tok in tokens:
        if tok == first:
            found_bad = True
        if not found_bad:
            clone = tok.copy()
            clone.string = ""
            new_tokens.append(clone)
            continue
        if tok is last:  # == would only compare string values
            if statement_continue:
                tok = tok.copy()
                tok.string += "..." + ket
            new_tokens.append(tok)
            break
        new_tokens.append(tok)
    return token_utils.untokenize(new_tokens).strip()


def add_mark_to_new_statement(statement_object, new_statement, mark):
    lines = []
    for line, new_line in zip(
        statement_object.entire_statement.split("\n"), new_statement.split("\n")
    ):
        lines.append(new_line)
        if line.strip() != new_line.strip():
            lines.append(mark)
    return "\n".join(lines)