1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
|
#!/usr/bin/env python3.8
"""pegen -- PEG Generator.
Search the web for PEG Parsers for reference.
"""
import argparse
import sys
import time
import token
import traceback
from typing import Tuple
from pegen.grammar import Grammar
from pegen.parser import Parser
from pegen.parser_generator import ParserGenerator
from pegen.tokenizer import Tokenizer
from pegen.validator import validate_grammar
def generate_c_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator
verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
try:
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
args.grammar_filename,
args.tokens_filename,
args.output,
args.compile_extension,
verbose_tokenizer,
verbose_parser,
args.verbose,
keep_asserts_in_extension=False if args.optimized else True,
skip_actions=args.skip_actions,
)
return grammar, parser, tokenizer, gen
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)
def generate_python_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator
verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
try:
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
args.grammar_filename,
args.output,
verbose_tokenizer,
verbose_parser,
skip_actions=args.skip_actions,
)
return grammar, parser, tokenizer, gen
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)
argparser = argparse.ArgumentParser(
prog="pegen", description="Experimental PEG-like parser generator"
)
argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
argparser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="Print timing stats; repeat for more debug output",
)
subparsers = argparser.add_subparsers(help="target language for the generated code")
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
c_parser.set_defaults(func=generate_c_code)
c_parser.add_argument("grammar_filename", help="Grammar description")
c_parser.add_argument("tokens_filename", help="Tokens description")
c_parser.add_argument(
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
)
c_parser.add_argument(
"--compile-extension",
action="store_true",
help="Compile generated C code into an extension module",
)
c_parser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
c_parser.add_argument(
"--skip-actions",
action="store_true",
help="Suppress code emission for rule actions",
)
python_parser = subparsers.add_parser("python", help="Generate Python code")
python_parser.set_defaults(func=generate_python_code)
python_parser.add_argument("grammar_filename", help="Grammar description")
python_parser.add_argument(
"-o",
"--output",
metavar="OUT",
default="parse.py",
help="Where to write the generated parser",
)
python_parser.add_argument(
"--skip-actions",
action="store_true",
help="Suppress code emission for rule actions",
)
def main() -> None:
from pegen.testutil import print_memstats
args = argparser.parse_args()
if "func" not in args:
argparser.error("Must specify the target language mode ('c' or 'python')")
t0 = time.time()
grammar, parser, tokenizer, gen = args.func(args)
t1 = time.time()
validate_grammar(grammar)
if not args.quiet:
if args.verbose:
print("Raw Grammar:")
for line in repr(grammar).splitlines():
print(" ", line)
print("Clean Grammar:")
for line in str(grammar).splitlines():
print(" ", line)
if args.verbose:
print("First Graph:")
for src, dsts in gen.first_graph.items():
print(f" {src} -> {', '.join(dsts)}")
print("First SCCS:")
for scc in gen.first_sccs:
print(" ", scc, end="")
if len(scc) > 1:
print(
" # Indirectly left-recursive; leaders:",
{name for name in scc if grammar.rules[name].leader},
)
else:
name = next(iter(scc))
if name in gen.first_graph[name]:
print(" # Left-recursive")
else:
print()
if args.verbose:
dt = t1 - t0
diag = tokenizer.diagnose()
nlines = diag.end[0]
if diag.type == token.ENDMARKER:
nlines -= 1
print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
if dt:
print(f"; {nlines / dt:.0f} lines/sec")
else:
print()
print("Caches sizes:")
print(f" token array : {len(tokenizer._tokens):10}")
print(f" cache : {len(parser._cache):10}")
if not print_memstats():
print("(Can't find psutil; install it for memory stats.)")
if __name__ == "__main__":
if sys.version_info < (3, 8):
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
sys.exit(1)
main()
|