1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
from __future__ import annotations
import re
from itertools import chain
from tatsu import grammars as model
from tatsu.ast import AST
def camel2py(name):
return re.sub(
r'([a-z0-9])([A-Z])',
lambda m: m.group(1) + '_' + m.group(2).lower(),
name,
)
class ANTLRSemantics:
def __init__(self, name):
self.name = name
self.tokens = {}
self.token_rules = {}
self.synthetic_rules = []
def grammar(self, ast):
return model.Grammar(
self.name,
[
r
for r in chain(ast.rules, self.synthetic_rules)
if r is not None
],
)
def rule(self, ast):
name = camel2py(ast.name)
exp = ast.exp
if name[0].isupper():
name = name.upper()
if isinstance(exp, model.Token):
if name in self.token_rules:
self.token_rules[
name
].exp = exp # it is a model._Decorator
else:
self.token_rules[name] = exp
return None
elif not ast.fragment and not isinstance(exp, model.Sequence):
ref = model.RuleRef(name.lower())
if name in self.token_rules:
self.token_rules[name].exp = ref
else:
self.token_rules[name] = ref
name = name.lower()
return model.Rule(ast, name, exp, ast.params, ast.kwparams)
def alternatives(self, ast):
options = [o for o in ast.options if o is not None]
if len(options) == 1:
return options[0]
else:
options = [model.Option(o) for o in options]
return model.Choice(options)
def elements(self, ast):
elements = [e for e in ast if e is not None]
if not elements:
return model.Void()
elif len(elements) == 1:
return elements[0]
else:
return model.Sequence(AST(sequence=elements))
def predicate_or_action(self, ast):
return None
def named(self, ast):
if ast.force_list:
return model.NamedList(ast)
else:
return model.Named(ast)
def syntactic_predicate(self, ast):
return None
def optional(self, ast):
if isinstance(ast, model.Group | model.Optional | model.Closure):
ast = ast.exp
return model.Optional(ast)
def closure(self, ast):
if isinstance(ast, model.Group | model.Optional):
ast = ast.exp
return model.Closure(ast)
def positive_closure(self, ast):
if isinstance(ast, model.Group):
ast = ast.exp
return model.PositiveClosure(ast)
def negative(self, ast):
neg = model.NegativeLookahead(ast)
any = model.Pattern('.')
return model.Sequence(AST(sequence=[neg, any]))
def subexp(self, ast):
return model.Group(ast)
def regexp(self, ast):
pattern = ''.join(ast)
re.compile(pattern)
return model.Pattern(pattern)
def charset_optional(self, ast):
return f'{ast}?'
def charset_closure(self, ast):
return f'{ast}*'
def charset_positive_closure(self, ast):
return f'{ast}+'
def charset_or(self, ast):
return '[{}]'.format(''.join(ast))
def charset_negative_or(self, ast):
return '[^{}]'.format(''.join(ast))
@staticmethod
def escape(s):
return ''.join('\\' + c if c in '[]().*+{}^$' else c for c in s)
def charset_atom(self, ast):
return ast
def charset_char(self, ast):
return ast
def charset_range(self, ast):
return f'{ast.first}-{ast.last}'
def newranges(self, ast):
pattern = ''.join(ast)
re.compile(pattern)
return model.Pattern(pattern)
def newrange(self, ast):
pattern = '[{}]{}'.format(ast.range, ast.repeat or '')
re.compile(pattern)
return pattern
def negative_newrange(self, ast):
pattern = '[^{}]{}'.format(ast.range, ast.repeat or '')
re.compile(pattern)
return pattern
def rule_ref(self, ast):
assert ast[0].islower()
return model.RuleRef(camel2py(ast))
def any(self, ast):
return model.Pattern(r'\w+|\S+')
def string(self, ast):
text = ast
if isinstance(text, list):
text = ''.join(text)
return model.Token(text)
def eof(self, ast):
return model.EOF()
def token(self, ast):
name = ast.name
if ast.value:
exp = model.Token(ast.value)
self.tokens[name] = exp
else:
exp = model.Fail()
rule = model.Rule(ast, name, exp, [], {})
self.synthetic_rules.append(rule)
return exp
def token_ref(self, ast):
name = camel2py(ast).upper()
value = self.tokens.get(name)
if value and isinstance(value, model.Model):
return value
if name in self.token_rules:
exp = self.token_rules[name]
else:
exp = model.Decorator(model.RuleRef(name))
self.token_rules[name] = exp
return exp
|