File: generate_parser_code.py

package info (click to toggle)
mcrl2 201409.0-1
  • links: PTS, VCS
  • area: main
  • in suites: buster, jessie, jessie-kfreebsd
  • size: 46,348 kB
  • ctags: 29,960
  • sloc: cpp: 213,160; ansic: 16,219; python: 13,238; yacc: 309; lex: 214; xml: 197; makefile: 83; sh: 82; pascal: 17
file content (328 lines) | stat: -rwxr-xr-x 11,245 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/env python

#~ Copyright 2011 Wieger Wesselink.
#~ Distributed under the Boost Software License, Version 1.0.
#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)

# This script generates code for traversing a DParser parse tree from a DParser grammar.
# It can only handle a subset of EBNF, so the result is not perfect. The layout of the
# DParser grammar is expected to be in a specific format.

import re
import string
from optparse import OptionParser
from mcrl2_utility import *
from parse_mcrl2_syntax import *
from path import *

MCRL2_MAPPING = '''
  ActDecl lps::action_label_list
  ActFrm action_formulas::action_formula
  ActIdSet core::identifier_string_list
  Action lps::action
  ActionLabelList lps::action_label
  ActionList lps::action_list
  ActionRenameRule lps::action_rename_rule
  ActionRenameRuleRHS lps::action_rename_rule_rhs
  ActionRenameRuleSpec lps::action_rename_rule
  ActionRenameSpec lps::action_rename_specification
  ActSpec lps::action_label_list
  BagEnumElt data::detail::data_expression_pair
  BagEnumEltList data::detail::data_expression_pair_list
  BesEqnDecl bes::boolean_equation
  BesEqnDeclList bes::boolean_equation_list
  BesEqnSpec bes::boolean_equation_system
  BesExpr bes::boolean_expression
  BesInit bes::boolean_variable
  BesSpec
  BesVar bes::boolean_variable
  CommExpr process::communication_expression
  CommExprList process::communication_expression_list
  CommExprSet process::communication_expression_list
  ConsSpec data::function_symbol_vector
  ConstrDecl data::structured_sort_constructor
  ConstrDeclList data::structured_sort_constructor_list
  DataExpr data::data_expression
  DataExprList data::data_expression_list
  DataExprUnit data::data_expression
  DataSpec data::data_specification
  DataValExpr data::data_expression
  Domain data::sort_expression_list
  EqnDecl data::data_equation
  EqnDeclList data::data_equation_list
  EqnSpec data::data_equation
  FixedPointOperator pbes_system::fixpoint_symbol
  GlobVarSpec data::variable_list
  IdDecl data::function_symbol
  IdsDecl data::function_symbol_vector
  IdsDeclList data::function_symbol_vector
  Init process::process_expression
  MapSpec data::function_symbol_vector
  mCRL2Spec lps::specification
  mCRL2SpecElt
  mCRL2SpecEltList
  MultAct lps::action_list
  MultActId process::action_name_multiset
  MultActIdList process::action_name_multiset_list
  MultActIdSet process::action_name_multiset_list
  PbesEqnDecl
  PbesEqnSpec
  PbesExpr pbes_system::pbes_expression
  PbesInit pbes_system::propositional_variable_instantiation
  PbesSpec pbes_system::pbes<>
  ProcDecl process::process_equation
  ProcDeclList process::process_equation_list
  ProcExpr process::process_expression
  ProcExprThenElse process::process_expression
  ProcSpec process::process_equation_list
  ProjDecl data::structured_sort_constructor_argument
  ProjDeclList data::structured_sort_constructor_argument_list
  PropVarDecl pbes_system::propositional_variable
  PropVarInst pbes_system::propositional_variable_instantiation
  RegFrm regular_formulas::regular_formula
  RenExpr process::rename_expression
  RenExprList process::rename_expression_list
  RenExprSet process::rename_expression_list
  SortDecl
  SimpleSortExpr data::sort_expression
  ComplexSortExpr data::sort_expression
  SortExpr data::sort_expression
  SortExprList data::sort_expression_list
  SortSpec
  StateFrm state_formulas::state_formula
  StateVarDecl
  VarDecl data::variable
  VarsDecl data::variable_list
  VarsDeclList data::variable_list
  VarSpec data::variable_list
  WhrExpr data::identifier_assignment
  WhrExprList data::identifier_assignment_list
'''

FSM_MAPPING = '''
  FSM
  Separator
  ParameterList
  Parameter
  ParameterName
  DomainCardinality
  DomainValueList
  DomainValue
  StateList
  State
  TransitionList
  Transition
  Source
  Target
  Label
  QuotedString core::identifier_string
  SortExpr data::sort_expression
'''

DOT_MAPPING = '''
graph
stmt_list
stmt
attr_stmt
attr_list
a_list
edge_stmt
edgeRHS
node_stmt
node_id
port
subgraph
compass_pt
edgeop
ID
quoted
name
number
'''

PRODUCTION_FUNCTION = '''  RETURNTYPE parse_PRODUCTION(const core::parse_node& node)
  {
BODY
  }
'''

# Global variables
production_return_types = {}

def make_condition(alternative):
    result = []
    words = alternative.split()
    result.append('(node.child_count() == %d)' % len(words))
    for i, word in enumerate(words):
        if word.startswith("'"):
            result.append('(symbol_name(node.child(%d)) == "%s")' % (i, word[1:-1]))
        elif not word[-1] in '*?+':
            result.append('(symbol_name(node.child(%d)) == "%s")' % (i, word))
    if len(result) == 0:
        result.append('true')
    return ' && '.join(result)

def symbol_names(rhs):
    result = []
    for (text, comment, annotation) in rhs:
        words = text.split()
        names = []
        for i, word in enumerate(words):
            if word.startswith("'"):
                names.append(word[1:-1])
            elif not word[-1] in '*?+':
                names.append(word)
            else:
                names.append('??')
        result.append(names)
    return result

#---------------------------------------------------------------#
#                          print_alternative
#---------------------------------------------------------------#
def print_alternative(text, add_condition = False):
    args = []

    words = text.split()
    for i, word in enumerate(words):
        if word.startswith("'"):
            continue
            function = production_return_types[word]
        args.append('parse_%s(node.child(%d))' % (word, i))
    result = 'return UNKNOWN_ALTERNATIVE(' + ', '.join(args) + ');'
    if add_condition:
        result = 'if (' + make_condition(text) + ') { ' + result + ' }'
    return result

#---------------------------------------------------------------#
#                          print_production
#---------------------------------------------------------------#
def print_production(lhs, rhs):
    if not lhs in production_return_types:
        production_return_types[lhs] = 'UNKNOWN'

    text = PRODUCTION_FUNCTION
    text = re.sub('RETURNTYPE', production_return_types[lhs], text)
    text = re.sub('PRODUCTION', lhs, text)

    if lhs.endswith('List'):
        body = '    return parse_list<%s>(node, "%s", boost::bind(&xyz_actions::parse_%s, this, _1));' % (production_return_types[lhs], lhs[:-4], lhs[:-4])
    else:
        add_condition = len(rhs) > 1
        alternatives = [print_alternative(t, add_condition) for (t, comment, annotation) in rhs]
        if len(alternatives) == 1:
            body = '    ' + alternatives[0]
        else:
            body = '    ' + '\n    else '.join(alternatives) + ('\n    report_unexpected_node(node);\n    return %s();' % production_return_types[lhs])

    text = re.sub('BODY', body, text)
    print text

#---------------------------------------------------------------#
#                          print_section
#---------------------------------------------------------------#
def print_section(title, productions):
    print '  //', title, '\n'
    for (lhs, rhs) in productions:
        print_production(lhs, rhs)

#---------------------------------------------------------------#
#                          post_process_sections
#---------------------------------------------------------------#
# Generates a new production T ::= T1 | ... | Tn for the pattern (T1 | ... | Tn)
# Generates new productions TList ::= T* for patterns T* and T+ (if TList does not yet exist)
def post_process_sections(sections):

    # make a mapping of all production left hand sides
    production_names = {}
    for (title, productions) in sections:
        for (lhs, rhs) in productions:
            production_names[lhs] = True

    for i, (title, productions) in enumerate(sections):
        new_productions = {}
        for j, (lhs, rhs) in enumerate(productions):
            if lhs.endswith('List'):
                continue

            for k, (text, comment, annotation) in enumerate(rhs):

                # Generate a new production T' ::= T for the pattern (T)
                regexp = "(?<!')\((([^)]|('\)))*[^'])\)"
                m = re.search(regexp, text)
                while m != None:
                    index = 1
                    new_lhs = lhs + 'Alternative' + str(index)
                    while new_lhs in new_productions:
                        index = index + 1
                        new_lhs = lhs + 'Alternative' + str(index)
                    text = re.sub(regexp, new_lhs, text, 1)
                    new_rhs = map(string.strip, re.split(r'\s*\|\s*', m.group(1)))
                    new_rhs = [(r, '', '') for r in new_rhs]
                    new_productions[new_lhs] = (new_lhs, new_rhs)
                    m = re.search(regexp, text)

                # Generates new productions TList ::= T* for patterns T* and T+ (if TList does not yet exist)
                regexp = r'\b((\w+)\s*[*+])'
                m = re.search(regexp, text)
                while m != None:
                    new_lhs = m.group(2) + 'List'
                    text = re.sub(regexp, new_lhs, text, 1)
                    if not new_lhs in production_names:
                        production_names[new_lhs] = True
                        new_rhs = [(m.group(2) + '*', '', '')]
                        new_productions[new_lhs] = (new_lhs, new_rhs)
                    m = re.search(regexp, text)

                rhs[k] = (text, comment, annotation)

            productions[j] = (lhs, rhs)

        sections[i] = (title, productions + new_productions.values())
    return sections

#---------------------------------------------------------------#
#                          my_print
#---------------------------------------------------------------#
def my_print(sections):
    for (title, productions) in sections:
        for (lhs, rhs) in productions:
            for (text, comment, annotation) in rhs:
                print lhs, '->', text


def generate_code(filename, production_mapping):
    global production_return_types
    production_return_types = {}
    for line in production_mapping.splitlines():
        words = line.split()
        if len(words) == 1:
            production_return_types[words[0]] = 'UNKNOWN'
        elif len(words) == 2:
            production_return_types[words[0]] = words[1]


    sections = parse_mcrl2_syntax(filename)
    sections = post_process_sections(sections)
    for (title, productions) in sections:
        print_section(title, productions)

#---------------------------------------------------------------#
#                          main
#---------------------------------------------------------------#
def main():
    usage = "usage: %prog [options]"
    parser = OptionParser(usage)
    (options, args) = parser.parse_args()

    filename = '../../../doc/specs/mcrl2-syntax.g'
    generate_code(filename, MCRL2_MAPPING)

    filename = '../../../doc/specs/fsm-syntax.g'
    #generate_code(filename, FSM_MAPPING)

    filename = '../../../doc/specs/dot-syntax.g'
    #generate_code(filename, DOT_MAPPING)

if __name__ == "__main__":
    main()