File: ebnf.py

package info (click to toggle)
pyparsing 3.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 12,200 kB
  • sloc: python: 30,867; ansic: 422; sh: 112; makefile: 24
file content (185 lines) | stat: -rw-r--r-- 4,605 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# This module tries to implement ISO 14977 standard with pyparsing.
# pyparsing version 1.1 or greater is required.
from typing import Any

# ISO 14977 standardize The Extended Backus-Naur Form(EBNF) syntax.
# You can read a final draft version here:
# https://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html
#
# Submitted 2004 by Seo Sanghyeon
# Updated to current pyparsing styles 2025 by Paul McGuire
#

import pyparsing as pp


all_names = """
integer
meta_identifier
terminal_string
optional_sequence
repeated_sequence
grouped_sequence
syntactic_primary
syntactic_factor
syntactic_term
single_definition
definitions_list
syntax_rule
syntax
""".split()

LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR, DASH, STAR, EQ, SEMI = pp.Suppress.using_each(
    "[]{}()-*=;"
)

integer = pp.common.integer()
meta_identifier = pp.common.identifier()
terminal_string = pp.Regex(
    r'"[^"]*"'
    r"|"
    r"'[^']*'"
).add_parse_action(pp.remove_quotes)

definitions_list = pp.Forward()
optional_sequence = LBRACK + definitions_list + RBRACK
repeated_sequence = LBRACE + definitions_list + RBRACE
grouped_sequence = LPAR + definitions_list + RPAR
syntactic_primary = (
    optional_sequence
    | repeated_sequence
    | grouped_sequence
    | meta_identifier
    | terminal_string
)
syntactic_factor = pp.Optional(integer + STAR) + syntactic_primary
syntactic_term = syntactic_factor + pp.Optional(DASH + syntactic_factor)
single_definition = pp.DelimitedList(syntactic_term, ",")
definitions_list <<= pp.DelimitedList(single_definition, "|")
syntax_rule = meta_identifier + EQ + definitions_list + SEMI

ebnfComment = (
    ("(*" + (pp.CharsNotIn("*") | ("*" + ~pp.Literal(")")))[...] + "*)")
    .streamline()
    .set_name("ebnfComment")
)

syntax = syntax_rule[1, ...]
syntax.ignore(ebnfComment)


def do_integer(toks):
    return int(toks[0])


def do_meta_identifier(toks):
    if toks[0] in symbol_table:
        return symbol_table[toks[0]]
    else:
        symbol_table[toks[0]] = pp.Forward()
        return symbol_table[toks[0]]


def do_terminal_string(toks):
    return pp.Literal(toks[0])


def do_optional_sequence(toks):
    return pp.Optional(toks[0])


def do_repeated_sequence(toks):
    return pp.ZeroOrMore(toks[0])


def do_grouped_sequence(toks):
    return pp.Group(toks[0])


def do_syntactic_primary(toks):
    return toks[0]


def do_syntactic_factor(toks):
    if len(toks) == 2 and toks[0] > 1:
        # integer * syntactic_primary
        return pp.And([toks[1]] * toks[0])
    else:
        # syntactic_primary
        return [toks[0]]


def do_syntactic_term(toks):
    if len(toks) == 2:
        # syntactic_factor - syntactic_factor
        return pp.NotAny(toks[1]) + toks[0]
    else:
        # syntactic_factor
        return [toks[0]]


def do_single_definition(toks):
    toks = toks.as_list()
    if len(toks) > 1:
        # syntactic_term , syntactic_term , ...
        return pp.And(toks)
    else:
        # syntactic_term
        return [toks[0]]


def do_definitions_list(toks):
    toks = toks.as_list()
    if len(toks) > 1:
        # single_definition | single_definition | ...
        return pp.Or(toks)
    else:
        # single_definition
        return [toks[0]]


def do_syntax_rule(toks):
    # meta_identifier = definitions_list ;
    assert toks[0].expr is None, "Duplicate definition"
    toks[0] <<= toks[1]
    return [toks[0]]


def do_syntax():
    # syntax_rule syntax_rule ...
    return symbol_table


for name in all_names:
    expr = vars()[name]
    action = vars()["do_" + name]
    expr.set_name(name)
    expr.add_parse_action(action)
    # expr.set_debug()


symbol_table: dict[str, pp.Forward] = {}


def parse(ebnf, given_table=None, *, enable_debug=False):
    given_table = given_table or {}
    symbol_table.clear()
    symbol_table.update(given_table)
    table = syntax.parse_string(ebnf, parse_all=True)[0]
    missing_definitions = [
        k for k, v in table.items()
        if k not in given_table and v.expr is None
    ]
    assert not missing_definitions, f"Missing definitions for {missing_definitions}"
    for name, expr in table.items():
        expr.set_name(name)
        expr.set_debug(enable_debug)
    return table


if __name__ == '__main__':
    try:
        syntax.create_diagram("ebnf_diagram.html")
    except Exception as e:
        print("Failed to create diagram for EBNF syntax parser"
              f" - {type(e).__name__}: {e}")