File: test_translate.py

package info (click to toggle)
pypy 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 107,216 kB
  • sloc: python: 1,201,787; ansic: 62,419; asm: 5,169; cpp: 3,017; sh: 2,534; makefile: 545; xml: 243; lisp: 45; awk: 4
file content (160 lines) | stat: -rw-r--r-- 6,052 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from rpython.rlib.parsing.lexer import *
# Unused, but needed for some obscure reason
from rpython.rlib.parsing.makepackrat import BacktrackException, Status
from rpython.rlib.parsing.regex import *
from rpython.rlib.parsing.parsing import *
from rpython.translator.c.test.test_genc import compile


class TestTranslateLexer(object):
    def get_lexer(self, rexs, names, ignore=None):
        return Lexer(rexs, names, ignore)

    def test_translate_simple(self):
        digits = RangeExpression("0", "9")
        lower = RangeExpression("a", "z")
        upper = RangeExpression("A", "Z")
        keywords = StringExpression("if") | StringExpression("else") | StringExpression("def") | StringExpression("class")
        underscore = StringExpression("_")
        atoms = lower + (upper | lower | digits | underscore).kleene()
        vars = underscore | (upper + (upper | lower | underscore | digits).kleene())
        integers = StringExpression("0") | (RangeExpression("1", "9") + digits.kleene())
        white = StringExpression(" ")
        l1 = self.get_lexer([keywords, atoms, vars, integers, white], ["KEYWORD", "ATOM", "VAR", "INT", "WHITE"])
        l2 = self.get_lexer([keywords, atoms, vars, integers, white], ["KEYWORD", "ATOM", "VAR", "INT", "WHITE"], ["WHITE"])
        def lex(s, ignore=False):
            if ignore:
                tokens = l2.tokenize(s)
            else:
                tokens = l1.tokenize(s)
            return "-%-".join([t.name for t in tokens])
        res = lex("if A a 12341 0 else").split("-%-")
        assert res == ("KEYWORD WHITE VAR WHITE ATOM WHITE INT WHITE "
                       "INT WHITE KEYWORD").split()
        res = lex("if A a 12341 0 else", True).split("-%-")
        assert res == "KEYWORD VAR ATOM INT INT KEYWORD".split()
        func = compile(lex, [str, bool])
        res = lex("if A a 12341 0 else", False).split("-%-")
        assert res == ("KEYWORD WHITE VAR WHITE ATOM WHITE INT WHITE "
                       "INT WHITE KEYWORD").split()
        res = lex("if A a 12341 0 else", True).split("-%-")
        assert res == "KEYWORD VAR ATOM INT INT KEYWORD".split()


def test_translate_parser():
    r0 = Rule("expression", [["additive", "EOF"]])
    r1 = Rule("additive", [["multitive", "+", "additive"], ["multitive"]])
    r2 = Rule("multitive", [["primary", "*", "multitive"], ["primary"]])
    r3 = Rule("primary", [["(", "additive", ")"], ["decimal"]])
    r4 = Rule("decimal", [[symb] for symb in "0123456789"])
    p = PackratParser([r0, r1, r2, r3, r4], "expression")
    tree = p.parse([Token(c, i, SourcePos(i, 0, i))
                        for i, c in enumerate(list("2*(3+4)") + ["EOF"])])
    data = [Token(c, i, SourcePos(i, 0, i))
                for i, c in enumerate(list("2*(3+4)") + ["EOF"])]
    print tree
    def parse(choose):
        tree = p.parse(data, lazy=False)
        return tree.symbol + " " + "-%-".join([c.symbol for c in tree.children])
    func = compile(parse, [bool])
    res1 = parse(True)
    res2 = func(True)
    assert res1 == res2


def test_translate_compiled_parser():
    r0 = Rule("expression", [["additive", "EOF"]])
    r1 = Rule("additive", [["multitive", "+", "additive"], ["multitive"]])
    r2 = Rule("multitive", [["primary", "*", "multitive"], ["primary"]])
    r3 = Rule("primary", [["(", "additive", ")"], ["decimal"]])
    r4 = Rule("decimal", [[symb] for symb in "0123456789"])
    p = PackratParser([r0, r1, r2, r3, r4], "expression")
    compiler = ParserCompiler(p)
    kls = compiler.compile()
    p = kls()
    tree = p.parse([Token(c, i, SourcePos(i, 0, i))
                        for i, c in enumerate(list("2*(3+4)") + ["EOF"])])
    data = [Token(c, i, SourcePos(i, 0, i))
               for i, c in enumerate(list("2*(3+4)") + ["EOF"])]
    print tree
    p = kls()
    def parse(choose):
        tree = p.parse(data)
        return tree.symbol + " " + "-%-".join([c.symbol for c in tree.children])
    func = compile(parse, [bool])
    res1 = parse(True)
    res2 = func(True)
    assert res1 == res2


def test_translate_ast_visitor():
    from rpython.rlib.parsing.ebnfparse import parse_ebnf, make_parse_function
    regexs, rules, ToAST = parse_ebnf("""
DECIMAL: "0|[1-9][0-9]*";
IGNORE: " ";
additive: multitive ["+!"] additive | <multitive>;
multitive: primary ["*!"] multitive | <primary>; #nonsense!
primary: "(" <additive> ")" | <DECIMAL>;
""")
    parse = make_parse_function(regexs, rules)
    def f():
        tree = parse("(0 +! 10) *! (999 +! 10) +! 1")
        tree = ToAST().visit_additive(tree)
        assert len(tree) == 1
        tree = tree[0]
        return tree.symbol + " " + "-&-".join([c.symbol for c in tree.children])
    res1 = f()
    func = compile(f, [])
    res2 = func()
    assert res1 == res2


def test_translate_pypackrat():
    from rpython.rlib.parsing.pypackrat import PackratParser
    class parser(PackratParser):
        """
        expr:
            additive;
        additive:
            a = additive
            '-'
            b = multitive
            return {'(%s - %s)' % (a, b)}
          | multitive;
        multitive:
            a = multitive
            '*'
            b = simple
            return {'(%s * %s)' % (a, b)}
          | simple;
        simple:
            ('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9');
        """
    print parser._code
    def parse(s):
        p = parser(s)
        return p.expr()
    res = parse("5-5-5")
    assert res == '((5 - 5) - 5)'
    func = compile(parse, [str])
    res = func("5-5-5")
    assert res == '((5 - 5) - 5)'

def test_translate_pypackrat_regex():
    from rpython.rlib.parsing.pypackrat import PackratParser
    class parser(PackratParser):
        """
        num:
            `([1-9][0-9]*)|0`;
        """
    print parser._code
    def parse(s):
        p = parser(s)
        return p.num()
    res = parse("1234")
    assert res == '1234'
    func = compile(parse, [str])
    res = func("12345")
    assert res == '12345'
    res = func("0")
    assert res == '0'