1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
from __future__ import print_function
###{standalone
#
#
# Lark Stand-alone Generator Tool
# ----------------------------------
# Generates a stand-alone LALR(1) parser with a standard lexer
#
# Git: https://github.com/erezsh/lark
# Author: Erez Shinan (erezshin@gmail.com)
#
#
# >>> LICENSE
#
# This tool and its generated code use a separate license from Lark,
# and are subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
# If you wish to purchase a commercial license for this tool and its
# generated code, you may contact me via email or otherwise.
#
# If MPL2 is incompatible with your free or open-source project,
# contact me and we'll work it out.
#
#
import os
from io import open
###}
import codecs
import sys
import token, tokenize
import os
from pprint import pprint
from os import path
from collections import defaultdict
from functools import partial
import lark
from lark import Lark
from lark.parsers.lalr_analysis import Reduce
from lark.grammar import RuleOptions, Rule
from lark.lexer import TerminalDef
_dir = path.dirname(__file__)
_larkdir = path.join(_dir, path.pardir)
EXTRACT_STANDALONE_FILES = [
'tools/standalone.py',
'exceptions.py',
'utils.py',
'tree.py',
'visitors.py',
'indenter.py',
'grammar.py',
'lexer.py',
'common.py',
'parse_tree_builder.py',
'parsers/lalr_parser.py',
'parsers/lalr_analysis.py',
'parser_frontends.py',
'lark.py',
]
def extract_sections(lines):
section = None
text = []
sections = defaultdict(list)
for l in lines:
if l.startswith('###'):
if l[3] == '{':
section = l[4:].strip()
elif l[3] == '}':
sections[section] += text
section = None
text = []
else:
raise ValueError(l)
elif section:
text.append(l)
return {name:''.join(text) for name, text in sections.items()}
def strip_docstrings(line_gen):
""" Strip comments and docstrings from a file.
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
"""
res = []
prev_toktype = token.INDENT
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(line_gen)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if slineno > last_lineno:
last_col = 0
if scol > last_col:
res.append(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
res.append("#--")
elif toktype == tokenize.COMMENT:
# Comment
res.append("##\n")
else:
res.append(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
return ''.join(res)
def main(fobj, start, print=print):
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
print('# The file was automatically generated by Lark v%s' % lark.__version__)
print('__version__ = "%s"' % lark.__version__)
print()
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
print(code)
data, m = lark_inst.memo_serialize([TerminalDef, Rule])
print( 'DATA = (' )
# pprint(data, width=160)
print(data)
print(')')
print( 'MEMO = (')
print(m)
print(')')
print('Shift = 0')
print('Reduce = 1')
print("def Lark_StandAlone(transformer=None, postlex=None):")
print(" return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)")
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Lark Stand-alone Generator Tool")
print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
sys.exit(1)
if len(sys.argv) == 3:
fn, start = sys.argv[1:]
elif len(sys.argv) == 2:
fn, start = sys.argv[1], 'start'
else:
assert False, sys.argv
with codecs.open(fn, encoding='utf8') as f:
main(f, start)
|