1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
"""This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar.
"""
from typing import Dict, Callable, Iterable, Optional
from .lark import Lark
from .tree import Tree, ParseTree
from .visitors import Transformer_InPlace
from .lexer import Token, PatternStr, TerminalDef
from .grammar import Terminal, NonTerminal, Symbol
from .tree_matcher import TreeMatcher, is_discarded_terminal
from .utils import is_id_continue
def is_iter_empty(i):
try:
_ = next(i)
return False
except StopIteration:
return True
class WriteTokensTransformer(Transformer_InPlace):
"Inserts discarded tokens into their correct place, according to the rules of grammar"
tokens: Dict[str, TerminalDef]
term_subs: Dict[str, Callable[[Symbol], str]]
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
self.tokens = tokens
self.term_subs = term_subs
def __default__(self, data, children, meta):
if not getattr(meta, 'match_tree', False):
return Tree(data, children)
iter_args = iter(children)
to_write = []
for sym in meta.orig_expansion:
if is_discarded_terminal(sym):
try:
v = self.term_subs[sym.name](sym)
except KeyError:
t = self.tokens[sym.name]
if not isinstance(t.pattern, PatternStr):
raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
v = t.pattern.value
to_write.append(v)
else:
x = next(iter_args)
if isinstance(x, list):
to_write += x
else:
if isinstance(x, Token):
assert Terminal(x.type) == sym, x
else:
assert NonTerminal(x.data) == sym, (sym, x)
to_write.append(x)
assert is_iter_empty(iter_args)
return to_write
class Reconstructor(TreeMatcher):
"""
A Reconstructor that will, given a full parse Tree, generate source code.
Note:
The reconstructor cannot generate values from regexps. If you need to produce discarded
regexes, such as newlines, use `term_subs` and provide default values for them.
Parameters:
parser: a Lark instance
term_subs: a dictionary of [Terminal name as str] to [output text as str]
"""
write_tokens: WriteTokensTransformer
def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
TreeMatcher.__init__(self, parser)
self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
def _reconstruct(self, tree):
unreduced_tree = self.match_tree(tree, tree.data)
res = self.write_tokens.transform(unreduced_tree)
for item in res:
if isinstance(item, Tree):
# TODO use orig_expansion.rulename to support templates
yield from self._reconstruct(item)
else:
yield item
def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
x = self._reconstruct(tree)
if postproc:
x = postproc(x)
y = []
prev_item = ''
for item in x:
if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
y.append(' ')
y.append(item)
prev_item = item
return ''.join(y)
|