1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
# Copyright (c) 2017-2026 Juancarlo AƱez (apalala@gmail.com)
# SPDX-License-Identifier: BSD-4-Clause
"""
Parse and translate a TatSu grammar into a Python parser for
the described language.
"""
from __future__ import annotations
from collections.abc import Callable
from typing import Any
from .. import grammars
from ..builder import BuilderConfig, Constructor, ModelBuilderSemantics, TypeContainer
from ..exceptions import ParseException
from ..infos import ParserConfig
from ..ngcodegen.walkgen_model import modelgen
from ..ngcodegen.walkgen_parser import pythongen
from ..objectmodel import Node
from ..parser import TatSuParserGenerator
from ..tokenizing import Tokenizer
from ..util.string import hashsha
__all__ = [
'compile',
'gencode',
'genmodel',
'grammars',
'modelgen',
'parse',
'pythongen',
'to_python_model',
'to_python_sourcecode',
]
__compiled_grammar_cache = {}
def compile(
grammar: str | Tokenizer,
name: str | None = None,
*,
config: ParserConfig | None = None,
filename: str | None = None,
basetype: type | None = None,
semantics: Any = None,
asmodel: bool = False,
builderconfig: BuilderConfig | None = None,
synthok: bool = True,
typedefs: list[TypeContainer] | None = None,
constructors: list[Constructor] | None = None,
**settings: Any,
) -> grammars.Grammar:
# check parameters
ParserConfig.new(
config=config,
semantics=semantics,
name=name,
filename=filename,
**settings,
)
if isinstance(semantics, type):
raise TypeError(
f'semantics must be an object instance or None, not class {semantics!r}',
)
cache = __compiled_grammar_cache
key = (name, hashsha(grammar), id(semantics))
if key in cache:
model = cache[key]
else:
gen = TatSuParserGenerator(name, **settings)
model = cache[key] = gen.parse(grammar, **settings)
asmodel = not semantics and (
asmodel
or isinstance(builderconfig, BuilderConfig)
or basetype is not None
or typedefs is not None
or constructors is not None
)
if semantics is not None:
model.semantics = semantics
elif asmodel:
# HACK: cheating, but necessary for bw-compatibility
builderconfig = BuilderConfig.new(
config=builderconfig,
synthok=synthok,
basetype=basetype,
typedefs=typedefs,
constructors=constructors,
)
model.semantics = ModelBuilderSemantics(config=builderconfig)
return model
def parse(
grammar: str,
text: str,
/,
*,
config: ParserConfig | None = None,
start: str | None = None,
name: str | None = None,
filename: str | None = None,
semantics: Any | None = None,
asmodel: bool = False,
builderconfig: BuilderConfig | None = None,
basetype: type | None = None,
synthok: bool = True,
typedefs: list[TypeContainer] | None = None,
constructors: list[Constructor] | None = None,
**settings: Any,
):
config = ParserConfig.new(
config=config,
start=start,
name=name,
filename=filename,
semantics=semantics,
**settings,
)
model = compile(grammar, config=config, asmodel=asmodel)
config.semantics = semantics or model.semantics
asmodel = not config.semantics and (
asmodel
or isinstance(builderconfig, BuilderConfig)
or basetype is not None
or typedefs is not None
or constructors is not None
)
if asmodel:
builderconfig = BuilderConfig.new(
config=builderconfig,
synthok=synthok,
basetype=basetype,
typedefs=typedefs,
constructors=constructors,
)
config.semantics = ModelBuilderSemantics(config=builderconfig)
return model.parse(text, start=start, semantics=semantics, config=config)
def to_python_sourcecode(
grammar: str,
/,
*,
name: str | None = None,
filename: str | None = None,
config: ParserConfig | None = None,
**settings: Any,
):
config = ParserConfig.new(config=config, name=name, filename=filename, **settings)
model = compile(grammar, name=name, filename=filename, config=config)
return pythongen(model)
def to_python_model(
grammar: str,
/,
*,
name: str | None = None,
filename: str | None = None,
basetype: type = Node,
config: ParserConfig | None = None,
**settings: Any,
):
config = ParserConfig.new(config=config, name=name, filename=filename, **settings)
model = compile(grammar, name=name, filename=filename, config=config)
return modelgen(model, basetype=basetype)
# for backwards compatibility. Use `compile()` instead
def genmodel(
*,
name: str | None = None,
grammar: str | None = None,
semantics: type | None = None,
config: ParserConfig | None = None,
**settings: Any,
):
if grammar is None:
raise ParseException('grammar is None')
return compile(grammar, name=name, semantics=semantics, config=config, **settings)
def gencode(
*,
name: str | None = None,
grammar: str,
trace: bool = False,
filename: str | None = None,
codegen: Callable = pythongen,
config: ParserConfig | None = None,
**settings: Any,
):
model = compile(
grammar,
name=name,
filename=filename,
trace=trace,
config=config,
**settings,
)
return codegen(model)
|