File: yacc2py.py

package info (click to toggle)
python-ptk 1.3.8%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 416 kB
  • sloc: python: 3,616; makefile: 200
file content (352 lines) | stat: -rw-r--r-- 12,987 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

"""
Converts a Yacc/Bison grammar definition into a Python skeleton that uses ptk.
"""

import io
import getopt
import sys
import collections
import codecs
import re

from ptk.parser import production, LRParser, ParseError
from ptk.lexer import token, ReLexer, EOF
from ptk.regex import buildRegex, DeadState


Symbol = collections.namedtuple('Symbol', ('name', 'argname'))


class Options(object):
    def __init__(self, opts):
        self.compact = False
        self.arguments = False
        self.filename = None
        for opt, val in opts:
            if opt in ('-c', '--compact'):
                self.compact = True
            if opt in ('-a', '--arguments'):
                self.arguments = True
            if opt in ('-o', '--output'):
                self.filename = val
            if opt in ('-h', '--help'):
                self.usage()

        if self.compact and self.arguments:
            print('--compact and --arguments are not compatible')
            self.usage(1)

        if self.filename is None:
            print('Output file not specified')
            self.usage(1)

    def usage(self, exitCode=0):
        print('Usage: %s [options] filename' % sys.argv[0])
        print('Options:')
        print('  -h, --help      Print this')
        print('  -c, --compact   Create one method for all alternatives of a production')
        print('  -o, --output <filename> Output to file (mandatory)')
        print('  -a, --arguments Generate argument names for items in productions (incompatible with --compact)')
        sys.exit(exitCode)

    @staticmethod
    def create():
        opts, args = getopt.getopt(sys.argv[1:], 'caho:', ['compact', 'arguments', 'help', 'output='])
        return Options(opts), args


class NullToken(object):
    def __init__(self, endMarker):
        self.__rx = buildRegex('(.|\n)*%s' % re.escape(endMarker)).start()

    def feed(self, char):
        try:
            if self.__rx.feed(char):
                return None, None
        except DeadState:
            return None, None


class YaccParser(LRParser, ReLexer):
    def __init__(self, options, stream):
        self.stream = stream
        self.options = options
        super().__init__()

        self.state = 0
        self.yaccStartSymbol = None
        self.allTokens = list()
        self.allProductions = list()
        self.precedences = list()

    # Lexer

    @token(r'%\{', types=[])
    def c_decl(self, tok):
        self.setConsumer(NullToken('%}'))

    @token(r'/\*', types=[])
    def comment(self, tok):
        self.setConsumer(NullToken('*/'))

    @token(r'%union\s*{', types=[]) # Hum, no LF possible before {
    def union(self, tok):
        self.setConsumer(NullToken('}'))

    @token(r'%%')
    def part_sep(self, tok):
        self.state += 1
        if self.state == 2:
            # Ignore C code after last %%
            class IgnoreCCode(object):
                def feed(self, char):
                    if char is EOF:
                        return EOF, EOF
            self.setConsumer(IgnoreCCode())

    @staticmethod
    def ignore(char):
        return char in [' ', '\t', '\n']

    @token(r'%(left|right|nonassoc)')
    def assoc_decl(self, tok):
        pass

    @token(r'[a-zA-Z_][a-zA-Z0-9_]*')
    def identifier(self, tok):
        pass

    @token('[1-9][0-9]*')
    def number(self, tok):
        tok.value = int(tok.value)

    @token('"')
    def string(self, tok):
        class StringParser(object):
            def __init__(self):
                self.state = 0
                self.value = io.StringIO()
            def feed(self, char):
                if self.state == 0:
                    if char == '"':
                        return 'string', self.value.getvalue()
                    if char == '\\':
                        self.state = 1
                    else:
                        self.value.write(char)
                elif self.state == 1:
                    self.value.write(char)
                    self.state = 0
        self.setConsumer(StringParser())

    @token(r'\{')
    def semantic_action(self, tok):
        # Don't try to be too smart; just balance {} that are not in string litterals
        class CSemanticAction(object):
            def __init__(self):
                self.state = 0
                self.count = 1
                self.value = io.StringIO()
                self.value.write('{')

            def feed(self, char):
                self.value.write(char)
                if self.state == 0: # Nothing special
                    if char == '}':
                        self.count -= 1
                        if self.count == 0:
                            return 'semantic_action', self.value.getvalue()
                    elif char == '{':
                        self.count += 1
                    elif char == '\\':
                        self.state = 1
                    elif char == '\'':
                        self.state = 2
                    elif char == '"':
                        self.state = 4
                elif self.state == 1: # Escaping single char
                    self.state = 0
                elif self.state == 2: # Character litteral. Not that this accepts several characters
                    if char == '\\':
                        self.state = 3
                    elif char == '\'':
                        self.state = 0
                elif self.state == 3: # Escaping in character litteral
                    self.state = 2
                elif self.state == 4: # In string litteral
                    if char == '\\':
                        self.state = 5
                    elif char == '"':
                        self.state = 0
                elif self.state == 5: # Escaping in string litteral
                    self.state = 4
        self.setConsumer(CSemanticAction())

    @token(r'\'.\'')
    def litteral_token(self, tok):
        tok.value = tok.value[1]

    # Parser

    @production('YACC_FILE -> META_DECLARATION* part_sep PRODUCTION_DECL*')
    def yacc_file(self):
        pass

    # Tokens, start symbol, etc

    @production('META_DECLARATION -> "%token" identifier+<tokens>')
    def token_declaration(self, tokens):
        self.allTokens.extend(tokens)

    @production('META_DECLARATION -> assoc_decl<assoc> identifier+<tokens>')
    def assoc_declaration(self, assoc, tokens):
        self.precedences.append((assoc, tokens))

    @production('META_DECLARATION -> "%start" identifier<name>')
    def start_declaration(self, name):
        self.yaccStartSymbol = name

    @production('META_DECLARATION -> "%type" identifier identifier+')
    @production('META_DECLARATION -> "%expect" number')
    @production('META_DECLARATION -> "%debug"')
    @production('META_DECLARATION -> "%defines"')
    @production('META_DECLARATION -> "%destructor" semantic_action identifier+')
    @production('META_DECLARATION -> "%file-prefix" "=" string')
    @production('META_DECLARATION -> "%locations"')
    @production('META_DECLARATION -> "%name-prefix" "=" string')
    @production('META_DECLARATION -> "%no-parser')
    @production('META_DECLARATION -> "%no-lines')
    @production('META_DECLARATION -> "%output" "=" string')
    @production('META_DECLARATION -> "%pure-parser"')
    @production('META_DECLARATION -> "%token-table"')
    @production('META_DECLARATION -> "%verbose"')
    @production('META_DECLARATION -> "%yacc"')
    def ignored_declaration(self):
        pass

    # Productions

    @production('PRODUCTION_DECL -> identifier<left> ":" PRODUCTION_RIGHT+("|")<right> ";"')
    def production_decl(self, left, right):
        self.allProductions.append((left, right))

    @production('PRODUCTION_RIGHT -> SYMBOL*<symbols>')
    def production_right(self, symbols):
        names = list()
        indexes = dict()
        for symbol in symbols:
            if symbol.argname is None:
                names.append((symbol.name, None))
            else:
                index = indexes.get(symbol.argname, 0)
                argname = symbol.argname if index == 0 else '%s_%d' % (symbol.argname, index + 1)
                indexes[symbol.argname] = index + 1
                names.append((symbol.name, argname))

        return dict(names=names, action=None, precedence=None)

    @production('PRODUCTION_RIGHT -> PRODUCTION_RIGHT<prod> semantic_action<action>')
    def production_right_action(self, prod, action):
        if prod['action'] is not None:
            raise RuntimeError('Duplicate semantic action "%s"' % action)
        prod['action'] = action
        return prod

    @production('PRODUCTION_RIGHT -> PRODUCTION_RIGHT<prod> "%prec" identifier<prec>')
    def production_right_prec(self, prod, prec):
        if prod['precedence'] is not None:
            raise RuntimeError('Duplicate precedence declaration "%s"' % prec)
        prod['precedence'] = prec
        return prod

    @production('SYMBOL -> identifier<tok>')
    def symbol_from_identifier(self, tok):
        return Symbol(tok, None if tok in self.allTokens else tok)

    @production('SYMBOL -> litteral_token<tok>')
    def symbol_from_litteral(self, tok):
        return Symbol('"%s"' % tok, None)

    def newSentence(self, result):
        self.stream.write('from ptk.lexer import ReLexer, token\n')
        self.stream.write('from ptk.parser import LRParser, production, leftAssoc, rightAssoc, nonAssoc\n')
        self.stream.write('\n')

        for assocType, tokens in self.precedences:
            self.stream.write('@%s(%s)\n' % ({'%left': 'leftAssoc', '%right': 'rightAssoc', '%nonassoc': 'nonAssoc'}[assocType],
                                             ', '.join([repr(tok) for tok in tokens])))
        self.stream.write('class Parser(LRParser, ReLexer):\n')
        if self.yaccStartSymbol is not None:
            self.stream.write('    startSymbol = %s\n' % repr(self.yaccStartSymbol))
            self.stream.write('\n')

        self.stream.write('    # Lexer\n')
        for name in self.allTokens:
            self.stream.write('\n')
            self.stream.write('    @token(r\'\')\n')
            self.stream.write('    def %s(self, tok):\n' % name)
            self.stream.write('        pass\n')

        methodIndexes = dict()
        def methodName(name):
            index = methodIndexes.get(name, 0)
            methodIndexes[name] = index + 1
            return name if index == 0 else '%s_%d' % (name, index + 1)

        for name, prods in self.allProductions:
            for prod in prods:
                if not self.options.compact:
                    self.stream.write('\n')
                if prod['action'] is not None:
                    for line in prod['action'].split('\n'):
                        self.stream.write('    # %s\n' % line)
                symnames = []
                for aname, argname in prod['names']:
                    symnames.append(aname if argname is None or not self.options.arguments else '%s<%s>' % (aname, argname))
                self.stream.write('    @production(\'%s -> %s\'' % (name, ' '.join(symnames)))
                if prod['precedence'] is not None:
                    self.stream.write(', priority=%s' % repr(prod['precedence']))
                self.stream.write(')\n')
                if not self.options.compact:
                    self.stream.write('    def %s(self' % methodName(name))
                    if self.options.arguments:
                        for aname, argname in prod['names']:
                            if argname is not None:
                                self.stream.write(', %s' % argname)
                    self.stream.write('):\n')
                    self.stream.write('        pass\n')
            if self.options.compact:
                self.stream.write('    def %s(self):\n' % methodName(name))
                self.stream.write('        pass\n')
                self.stream.write('\n')


def main(filename):
    import time
    options, filenames = Options.create()
    for filename in filenames:
        with codecs.getreader('utf_8')(open(filename, 'rb')) as fileobj:
            output = sys.stdout if options.filename == '-' else codecs.getwriter('utf_8')(open(options.filename, 'wb'))
            parser = YaccParser(options, output)
            t0 = time.time()
            try:
                parser.parse(fileobj.read())
            except ParseError as exc:
                print('Parse error: %s' % exc)
                tokens = exc.expecting()
                if tokens:
                    print('Was expecting %s' % ', '.join(map(repr, sorted(tokens))))
                sys.exit(1)
            finally:
                print('== Parsed file in %d ms.' % int(1000 * (time.time() - t0)))


if __name__ == '__main__':
    import logging
    logging.basicConfig(level=logging.WARNING, format='%(asctime)-15s %(levelname)-8s %(name)-15s %(message)s')

    import sys
    main(sys.argv[1])