File: parser.py

package info (click to toggle)
python-baron 0.10.1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,080 kB
  • sloc: python: 26,926; makefile: 126; sh: 27
file content (165 lines) | stat: -rw-r--r-- 6,302 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import errno
import os
import json
import stat
import tempfile
import warnings

from .token import BaronToken

from rply import ParserGenerator
from rply.parser import LRParser
from rply.parsergenerator import LRTable
from rply.errors import ParserGeneratorWarning
from rply.grammar import Grammar
from .utils import BaronError


class ParsingError(BaronError):
    pass


class BaronParserGenerator(ParserGenerator):
    def build(self):
        g = Grammar(self.tokens)

        for level, (assoc, terms) in enumerate(self.precedence, 1):
            for term in terms:
                g.set_precedence(term, assoc, level)

        for prod_name, syms, func, precedence in self.productions:
            g.add_production(prod_name, syms, func, precedence)

        g.set_start()

        for unused_term in g.unused_terminals():
            warnings.warn(
                "Token %r is unused" % unused_term,
                ParserGeneratorWarning,
                stacklevel=2
            )
        for unused_prod in g.unused_productions():
            warnings.warn(
                "Production %r is not reachable" % unused_prod,
                ParserGeneratorWarning,
                stacklevel=2
            )

        g.build_lritems()
        g.compute_first()
        g.compute_follow()

        # win32 temp directories are already per-user
        if os.name == "nt":
            cache_file = os.path.join(
                tempfile.gettempdir(),
                "rply-%s-%s-%s.json" % (self.VERSION, self.cache_id, self.compute_grammar_hash(g))
            )
        else:
            cache_file = os.path.join(
                tempfile.gettempdir(),
                "rply-%s-%s-%s-%s.json" % (self.VERSION, os.getuid(), self.cache_id, self.compute_grammar_hash(g))
            )
        table = None
        if os.path.exists(cache_file):
            with open(cache_file) as f:
                try:
                    data = json.load(f)
                except Exception:
                    os.remove(cache_file)
                    data = None

                if data is not None:
                    stat_result = os.fstat(f.fileno())
                    if (
                        os.name == "nt" or (
                            stat_result.st_uid == os.getuid()
                            and stat.S_IMODE(stat_result.st_mode) == 0o0600
                        )
                    ):
                        if self.data_is_valid(g, data):
                            table = LRTable.from_cache(g, data)

        if table is None:
            table = LRTable.from_grammar(g)
            try:
                fd = os.open(cache_file, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o0600)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise
            else:
                with os.fdopen(fd, "w") as f:
                    json.dump(self.serialize_table(table), f)
        # meh :(
        # if table.sr_conflicts:
            # warnings.warn(
                # "%d shift/reduce conflict%s" % (len(table.sr_conflicts), "s" if len(table.sr_conflicts) > 1 else ""),
                # ParserGeneratorWarning,
                # stacklevel=2,
            # )
        # if table.rr_conflicts:
            # warnings.warn(
                # "%d reduce/reduce conflict%s" % (len(table.rr_conflicts), "s" if len(table.rr_conflicts) > 1 else ""),
                # ParserGeneratorWarning,
                # stacklevel=2,
            # )
        return BaronLRParser(table, self.error_handler)


class BaronLRParser(LRParser):
    def parse(self, tokenizer, state=None):
        lookahead = None
        lookaheadstack = []

        statestack = [0]
        symstack = [BaronToken("$end", "$end")]

        current_state = 0

        parsed_file_content = ""

        while True:
            if self.lr_table.default_reductions[current_state]:
                t = self.lr_table.default_reductions[current_state]
                current_state = self._reduce_production(t, symstack, statestack, state)
                continue

            if lookahead is None:
                if lookaheadstack:
                    lookahead = lookaheadstack.pop()
                else:
                    try:
                        lookahead = next(tokenizer)
                    except StopIteration:
                        lookahead = None

                if lookahead is None:
                    lookahead = BaronToken("$end", "$end")
                else:
                    parsed_file_content += lookahead.render()

            ltype = lookahead.gettokentype()
            if ltype in self.lr_table.lr_action[current_state]:
                t = self.lr_table.lr_action[current_state][ltype]
                if t > 0:
                    statestack.append(t)
                    current_state = t
                    symstack.append(lookahead)
                    lookahead = None
                    continue
                elif t < 0:
                    current_state = self._reduce_production(t, symstack, statestack, state)
                    continue
                else:
                    n = symstack[-1]
                    return n
            else:
                debug_output = parsed_file_content.split("\n")
                debug_output = list(zip(range(1, len(debug_output) + 1), debug_output))
                debug_output = debug_output[-8:]
                debug_output = "\n".join(["%4s %s" % (x[0], x[1]) for x in debug_output])
                debug_output += "<---- here"
                debug_output = "Error, got an unexpected token %s here:\n\n" % ltype + debug_output
                debug_output += "\n\nThe token %s should be one of those: %s" % (ltype, ", ".join(sorted(self.lr_table.lr_action[current_state].keys())))
                debug_output += "\n\nBaron has failed to parse this input. If this is valid python code (and by that I mean that the python binary successfully parse this code without any syntax error) (also consider that baron does not yet parse python 3 code integrally) it would be kind if you can extract a snippet of your code that make Baron fails and open a bug here: https://github.com/PyCQA/baron/issues\n\nSorry for the inconvenience."
                raise ParsingError(debug_output)