File: grouper.py

package info (click to toggle)
python-baron 0.10.1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,080 kB
  • sloc: python: 26,926; makefile: 126; sh: 27
file content (116 lines) | stat: -rw-r--r-- 3,898 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# encoding: utf-8

import re
from .utils import FlexibleIterator

to_group = (
    ("+", "="),
    ("-", "="),
    ("*", "="),
    ("/", "="),
    ("%", "="),
    ("&", "="),
    ("|", "="),
    ("^", "="),
    ("@", "="),
    ("/", "/"),
    ("*", "*"),
    ("<", "<"),
    (">", ">"),
    ("=", "="),
    ("!", "="),
    ("<", ">"),
    ("<", "="),
    (">", "="),
    ("**", "="),
    ("//", "="),
    ("<<", "="),
    (">>", "="),
    ("\r", "\n"),
    (".", "."),
    ("..", "."),
    ("-", ">"),
)

to_group_keys, _ = list(zip(*to_group))


def group(sequence):
    return list(group_generator(sequence))


def match_on_next(regex, iterator):
    return iterator.show_next() and re.match(regex, iterator.show_next())


def group_generator(sequence):
    iterator = FlexibleIterator(sequence)
    current = None
    while True:
        if iterator.end():
            return

        current = next(iterator)
        if current in to_group_keys and matching_found(to_group, current, iterator.show_next()):
            current += next(iterator)
        if current in to_group_keys and matching_found(to_group, current, iterator.show_next()):
            current += next(iterator)
        if current in list('uUfFrRbB') and str(iterator.show_next()).startswith(('"', "'")):
            current += next(iterator)
        if str(current).lower() in ["ur", "br", "fr", "rf"] and str(iterator.show_next()).startswith(('"', "'")):
            current += next(iterator)
        if any([re.match(x, current) for x in (r'^\d+[eE]$', r'^\d+\.\d*[eE]$', r'^\.\d+[eE]$')]):
            current += next(iterator)
            current += next(iterator)

            # It's required in a case where I have something like that:
            # ['123.123e', '[+-]', '123']
            assert re.match(r'^\d+[eE][-+]?\d+[jJ]?$', current) or re.match(r'^\d*.\d*[eE][-+]?\d+[jJ]?$', current)

        if current == "\\" and iterator.show_next() in ('\n', '\r\n'):
            current += next(iterator)
            if re.match(r'^\s+$', str(iterator.show_next())):
                current += next(iterator)

        if current == "\\" and iterator.show_next() == "\r" and iterator.show_next(2) == "\n":
            current += next(iterator)
            current += next(iterator)
            if re.match(r'^\s+$', str(iterator.show_next())):
                current += next(iterator)

        if re.match(r'^\s+$', current) and iterator.show_next() == "\\":
            current += next(iterator)
            current += next(iterator)
            if iterator.show_next() == "\n":
                current += next(iterator)
            if re.match(r'^\s+$', str(iterator.show_next())):
                current += next(iterator)

        if (re.match(r'^[_\d]+$', current) and match_on_next(r'^\.$', iterator)) or\
           (current == "." and match_on_next(r'^\d+[_\d]*([jJ]|[eE]\d*)?$', iterator)):
            current += next(iterator)

            if match_on_next(r'^[_\d]*[jJ]?$', iterator) and match_on_next(r'^[_\d]*[jJ]?$', iterator).group():
                current += next(iterator)

        if re.match(r'^\d+\.$', current) and match_on_next(r'^\d*[eE]\d*$', iterator):
            current += next(iterator)

        if re.match(r'^\d+\.?[eE]$', current) and match_on_next(r'^\d+$', iterator):
            current += next(iterator)

        if re.match(r'^\d*\.?\d*[eE]$', current) and not re.match('[eE]', current) and match_on_next(r'^[-+]$', iterator) and iterator.show_next(2) and re.match(r'^\d+$', iterator.show_next(2)):
            current += next(iterator)
            current += next(iterator)

        # edge case where 2 dots follow themselves but not 3 (an ellipsis)
        if current == "..":
            yield "."
            yield "."
            continue

        yield current


def matching_found(to_group, current, target):
    return target in [x[1] for x in to_group if x[0] == current]