1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
|
###{standalone
#
#
# Lark Stand-alone Generator Tool
# ----------------------------------
# Generates a stand-alone LALR(1) parser
#
# Git: https://github.com/erezsh/lark
# Author: Erez Shinan (erezshin@gmail.com)
#
#
# >>> LICENSE
#
# This tool and its generated code use a separate license from Lark,
# and are subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
# If you wish to purchase a commercial license for this tool and its
# generated code, you may contact me via email or otherwise.
#
# If MPL2 is incompatible with your free or open-source project,
# contact me and we'll work it out.
#
#
from copy import deepcopy
from abc import ABC, abstractmethod
from types import ModuleType
from typing import (
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Union, Iterable, IO, TYPE_CHECKING, overload, Sequence,
Pattern as REPattern, ClassVar, Set, Mapping
)
###}
import sys
import token, tokenize
import os
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser
import lark
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
from lark.grammar import Rule
from lark.lexer import TerminalDef
_dir = path.dirname(__file__)
_larkdir = path.join(_dir, path.pardir)
EXTRACT_STANDALONE_FILES = [
'tools/standalone.py',
'exceptions.py',
'utils.py',
'tree.py',
'visitors.py',
'grammar.py',
'lexer.py',
'common.py',
'parse_tree_builder.py',
'parsers/lalr_analysis.py',
'parsers/lalr_parser_state.py',
'parsers/lalr_parser.py',
'parsers/lalr_interactive_parser.py',
'parser_frontends.py',
'lark.py',
'indenter.py',
]
def extract_sections(lines):
section = None
text = []
sections = defaultdict(list)
for line in lines:
if line.startswith('###'):
if line[3] == '{':
section = line[4:].strip()
elif line[3] == '}':
sections[section] += text
section = None
text = []
else:
raise ValueError(line)
elif section:
text.append(line)
return {name: ''.join(text) for name, text in sections.items()}
def strip_docstrings(line_gen):
""" Strip comments and docstrings from a file.
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
"""
res = []
prev_toktype = token.INDENT
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(line_gen)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if slineno > last_lineno:
last_col = 0
if scol > last_col:
res.append(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
res.append("#--")
elif toktype == tokenize.COMMENT:
# Comment
res.append("##\n")
else:
res.append(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
return ''.join(res)
def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
if output is None:
output = partial(print, file=out)
import pickle, zlib, base64
def compressed_output(obj):
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
c = zlib.compress(s)
output(repr(base64.b64encode(c)))
def output_decompress(name):
output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
output('# The file was automatically generated by Lark v%s' % lark.__version__)
output('__version__ = "%s"' % lark.__version__)
output()
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
output(code)
data, m = lark_inst.memo_serialize([TerminalDef, Rule])
output('import pickle, zlib, base64')
if compress:
output('DATA = (')
compressed_output(data)
output(')')
output_decompress('DATA')
output('MEMO = (')
compressed_output(m)
output(')')
output_decompress('MEMO')
else:
output('DATA = (')
output(data)
output(')')
output('MEMO = (')
output(m)
output(')')
output('Shift = 0')
output('Reduce = 1')
output("def Lark_StandAlone(**kwargs):")
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
def main():
make_warnings_comments()
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
ns = parser.parse_args()
lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out, compress=ns.compress)
ns.out.close()
ns.grammar_file.close()
if __name__ == '__main__':
main()
|