1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
# Copyright (c) 2017-2026 Juancarlo AƱez (apalala@gmail.com)
# SPDX-License-Identifier: BSD-4-Clause
from __future__ import annotations
from ast import literal_eval
from collections.abc import Iterable
from typing import Any
from . import grammars
from .builder import ModelBuilderSemantics
from .contexts import ParseContext
from .exceptions import FailedSemantics
from .leftrec import mark_left_recursion
from .util import eval_escapes, re, warning
from .util.abctools import flatten
class TatSuGrammarSemantics(ModelBuilderSemantics):
def __init__(self, name: str | None = None, context: ParseContext | None = None):
super().__init__(
basetype=grammars.Model,
constructors=grammars.Model.classes(), # ty:ignore[invalid-argument-type]
)
self.name = name
self.context = context
self.rulemap: dict[str, grammars.Rule] = {}
def set_context(self, context: ParseContext):
self.context = context
@classmethod
def _validate_literal(cls, ast: Any):
try:
literal_eval(repr(str(ast)))
except SyntaxError as e:
raise FailedSemantics('literal string error: ' + str(e)) from e
@classmethod
def _validate_pattern(cls, ast: Any):
cls._validate_literal(ast)
try:
re.compile(str(ast))
except (TypeError, re.error) as e:
raise FailedSemantics('pattern error: ' + str(e)) from e
def EMPTYLINE(self, ast: Any, *args) -> Any:
return ast
def token(self, ast: str, *args: Any) -> grammars.Token:
token = ast
if not token:
raise FailedSemantics('empty token')
literal_eval(repr(token))
return grammars.Token(ast=token)
def pattern(self, ast: str, *args) -> grammars.Pattern:
pattern = ast
self._validate_literal(pattern)
return grammars.Pattern(ast=pattern)
def regexes(self, ast: Iterable[str], *args) -> Iterable[str]:
pattern = ''.join(ast)
self._validate_pattern(pattern)
return ast
def regex(self, ast: str, *args) -> str:
pattern = ast
self._validate_pattern(pattern)
return pattern
def string(self, ast):
return eval_escapes(ast)
def hex(self, ast):
return int(ast, 16)
def float(self, ast):
return float(ast)
def int(self, ast):
return int(ast)
def null(self, ast):
return None
def cut_deprecated(self, ast, *args):
warning('The use of >> for cut is deprecated. Use the ~ symbol instead.')
return grammars.Cut()
def override_single_deprecated(self, ast, *args):
warning('The use of @ for override is deprecated. Use @: instead')
return grammars.Override(ast)
def sequence(self, ast, *args):
# if isinstance(ast, list | tuple):
# seq = ast
# else:
# seq = ast.sequence
seq = ast
assert isinstance(seq, list), str(seq)
if len(seq) == 1:
return seq[0]
return grammars.Sequence(ast=ast)
def choice(self, ast):
return grammars.Choice(ast=ast)
def new_name(self, name):
if name in self.rulemap:
raise FailedSemantics(f'rule "{name!s}" already defined')
return name
def known_name(self, name) -> str:
if name not in self.rulemap:
raise FailedSemantics(f'rule "{name!s}" not yet defined')
return name
def boolean(self, ast):
return str(ast).lower() in {'true', 'yes', 'ok', '1'}
def rule(self, ast, *args):
decorators = ast.decorators
name = ast.name
base = ast.base
params = ast.params
kwparams = dict(ast.kwparams) if ast.kwparams else {}
if 'override' not in decorators and name in self.rulemap:
self.new_name(name)
elif 'override' in decorators:
self.known_name(name)
if not base:
rule = grammars.Rule(
ast=ast,
name=name,
params=params,
kwparams=kwparams,
decorators=decorators,
)
else:
self.known_name(base)
baserule = self.rulemap[base]
rule = grammars.BasedRule(
ast=ast,
name=name,
baserule=baserule,
params=params,
kwparams=kwparams,
decorators=decorators,
)
self.rulemap[name] = rule
return rule
def rule_include(self, ast, *args):
name = str(ast)
self.known_name(name)
rule = self.rulemap[name]
return grammars.RuleInclude(ast=ast, rule=rule)
def grammar(self, ast, *args):
directives = {d.name: d.value for d in flatten(ast.directives)}
for value in directives.values():
literal_eval(repr(value))
keywords = list(flatten(ast.keywords)) or []
if directives.get('whitespace') in {'None', 'False'}:
# NOTE: use '' because None will _not_ override defaults in configuration
directives['whitespace'] = ''
name = self.name or directives.get('grammar')
grammar = grammars.Grammar(
name,
list(self.rulemap.values()),
directives=directives,
keywords=keywords,
)
if grammar.config.left_recursion:
mark_left_recursion(grammar)
return grammar
|