File: test_lexer.py

package info (click to toggle)
pypy 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 107,216 kB
  • sloc: python: 1,201,787; ansic: 62,419; asm: 5,169; cpp: 3,017; sh: 2,534; makefile: 545; xml: 243; lisp: 45; awk: 4
file content (158 lines) | stat: -rw-r--r-- 7,646 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import py
from rpython.rlib.parsing.lexer import *
from rpython.rlib.parsing.regex import *
from rpython.rlib.parsing import deterministic

class TestDirectLexer(object):
    def get_lexer(self, rexs, names, ignore=None):
        return Lexer(rexs, names, ignore)

    def test_simple(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" ")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE"]
        l = self.get_lexer(rexs, names)
        r = l.get_runner("if: else: while:")
        t = r.find_next_token()
        assert t == Token("IF", "if", SourcePos(0, 0, 0))
        t = r.find_next_token()
        assert t == Token("COLON", ":", SourcePos(2, 0, 2))
        t = r.find_next_token()
        assert t == Token("WHITE", " ", SourcePos(3, 0, 3))
        t = r.find_next_token()
        assert t == Token("ELSE", "else", SourcePos(4, 0, 4))
        t = r.find_next_token()
        assert t == Token("COLON", ":", SourcePos(8, 0, 8))
        t = r.find_next_token()
        assert t == Token("WHITE", " ", SourcePos(9, 0, 9))
        t = r.find_next_token()
        assert t == Token("WHILE", "while", SourcePos(10, 0, 10))
        t = r.find_next_token()
        assert t == Token("COLON", ":", SourcePos(15, 0, 15))
        py.test.raises(StopIteration, r.find_next_token)
        assert [t.name for t in l.tokenize("if if if: else while")] == "IF WHITE IF WHITE IF COLON WHITE ELSE WHITE WHILE".split()

    def test_pro(self):
        digits = RangeExpression("0", "9")
        lower = RangeExpression("a", "z")
        upper = RangeExpression("A", "Z")
        keywords = StringExpression("if") | StringExpression("else") | StringExpression("def") | StringExpression("class")
        underscore = StringExpression("_")
        atoms = lower + (upper | lower | digits | underscore).kleene()
        vars = underscore | (upper + (upper | lower | underscore | digits).kleene())
        integers = StringExpression("0") | (RangeExpression("1", "9") + digits.kleene())
        white = StringExpression(" ")
        l = self.get_lexer([keywords, atoms, vars, integers, white], ["KEYWORD", "ATOM", "VAR", "INT", "WHITE"])
        assert ([t.name for t in l.tokenize("if A a 12341 0 else")] ==
                "KEYWORD WHITE VAR WHITE ATOM WHITE INT WHITE INT WHITE KEYWORD".split())

    def test_ignore(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" ")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE"]
        l = self.get_lexer(rexs, names, ["WHITE"])
        assert [t.name for t in l.tokenize("if if if: else while")] == "IF IF IF COLON ELSE WHILE".split()
      
    def test_errors(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" ")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE"]
        l = self.get_lexer(rexs, names, ["WHITE"])
        info = py.test.raises(deterministic.LexerError, l.tokenize, "if if if: a else while")
        print dir(info)
        print info.__class__
        exc = info.value
        assert exc.input[exc.source_pos.i] == "a"

    def test_eof(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" ")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE"]
        l = self.get_lexer(rexs, names, ["WHITE"])
        s = "if if if: else while"
        tokens = list(l.get_runner(s, eof=True))
        print tokens
        assert tokens[-1] == Token("EOF", "EOF", SourcePos(len(s), 0, len(s)))
        tokens = l.tokenize(s, eof=True)
        print tokens
        assert tokens[-1] == Token("EOF", "EOF", SourcePos(len(s), 0, len(s)))

    def test_position(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" "), StringExpression("\n")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE", "NL"]
        l = self.get_lexer(rexs, names, ["WHITE"])
        s = "if\nif if:\nelse while\n"
        tokens = list(l.get_runner(s, eof=True))
        assert tokens[0] == Token("IF", "if", SourcePos(0, 0, 0))
        assert tokens[1] == Token("NL", "\n", SourcePos(2, 0, 2))
        assert tokens[2] == Token("IF", "if", SourcePos(3, 1, 0))
        assert tokens[3] == Token("IF", "if", SourcePos(6, 1, 3))
        assert tokens[4] == Token("COLON", ":", SourcePos(8, 1, 5))
        assert tokens[5] == Token("NL", "\n", SourcePos(9, 1, 6))
        assert tokens[6] == Token("ELSE", "else", SourcePos(10, 2, 0))
        assert tokens[7] == Token("WHILE", "while", SourcePos(15, 2, 5))
        assert tokens[8] == Token("NL", "\n", SourcePos(20, 2, 10))
        assert tokens[9] == Token("EOF", "EOF", SourcePos(21, 3, 0))

    def test_position_ignore(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" "), StringExpression("\n")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE", "NL"]
        l = self.get_lexer(rexs, names, ["WHITE", "NL"])
        s = "if\nif if:\nelse while\n"
        tokens = list(l.get_runner(s, eof=True))
        assert tokens[0] == Token("IF", "if", SourcePos(0, 0, 0))
        assert tokens[1] == Token("IF", "if", SourcePos(3, 1, 0))
        assert tokens[2] == Token("IF", "if", SourcePos(6, 1, 3))
        assert tokens[3] == Token("COLON", ":", SourcePos(8, 1, 5))
        assert tokens[4] == Token("ELSE", "else", SourcePos(10, 2, 0))
        assert tokens[5] == Token("WHILE", "while", SourcePos(15, 2, 5))
        assert tokens[6] == Token("EOF", "EOF", SourcePos(21, 3, 0))

    def test_left_stuff_at_eof(self):
        rexs = [StringExpression("if"), StringExpression("else"),
                StringExpression("while"), StringExpression(":"),
                StringExpression(" "), StringExpression("\n")]
        names = ["IF", "ELSE", "WHILE", "COLON", "WHITE", "NL"]
        l = self.get_lexer(rexs, names)
        s = "if: whi"
        runner = l.get_runner(s, eof=True)
        tokens = []
        tok = runner.find_next_token()
        assert tok.name == "IF"
        tok = runner.find_next_token()
        assert tok.name == "COLON"
        tok = runner.find_next_token()
        assert tok.name == "WHITE"
        py.test.raises(deterministic.LexerError, runner.find_next_token)

class TestSourcePos(object):
    def test_copy(self):
        base = SourcePos(1, 2, 3)
        attributes = {'i':4, 'lineno': 5, 'columnno': 6}
        for attr, new_val in attributes.iteritems():
            copy = base.copy()
            assert base==copy
            setattr(copy, attr, new_val)    # change one attribute
            assert base!=copy

class TestToken(object):
    def test_copy(self):
        base = Token('test', 'spource', SourcePos(1,2,3))
        attributes = {'name': 'xxx', 'source': 'yyy', 'source_pos': SourcePos(4,5,6)}
        for attr, new_val in attributes.iteritems():
            copy = base.copy()
            assert base==copy
            setattr(copy, attr, new_val)    # change one attribute
            assert base!=copy
        # copy() is not deep... verify this.
        copy = base.copy()
        copy.source_pos.i = 0 # changes base too
        assert base==copy