File: test_automata.py

package info (click to toggle)
pypy3 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 111,848 kB
  • sloc: python: 1,291,746; ansic: 74,281; asm: 5,187; cpp: 3,017; sh: 2,533; makefile: 544; xml: 243; lisp: 45; csh: 21; awk: 4
file content (47 lines) | stat: -rw-r--r-- 1,487 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# coding: utf-8
import pytest

from pypy.interpreter.pyparser.automata import DFA, NonGreedyDFA, DEFAULT, NON_ASCII

def test_states():
    d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True])
    assert d.states == "\x01\xff\xff\x00"
    assert d.defaults == "\xff\xff"
    assert d.max_char == 2

    d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True])
    assert d.states == "\x01\x00"
    assert d.defaults == "\xff\x00"
    assert d.max_char == 1

def test_recognize():
    d = DFA([{"a": 1}, {"b": 0}], [False, True])
    assert d.recognize("ababab") == 5
    assert d.recognize("c") == -1

    d = DFA([{"a": 1}, {DEFAULT: 0}], [False, True])
    assert d.recognize("a,a?ab") == 5
    assert d.recognize("c") == -1

    d = NonGreedyDFA([{"a": 1}, {"b": 0}], [False, True])
    assert d.recognize("ababab") == 1
    assert d.recognize("c") == -1

    d = NonGreedyDFA([{"a": 1}, {DEFAULT: 0}], [False, True])
    assert d.recognize("a,a?ab") == 1
    assert d.recognize("c") == -1

def test_nonascii():
    d = DFA([{"a": 1}, {NON_ASCII: 1}], [False, True])
    input = u"aüüüü".encode("utf-8")
    assert d.recognize(input) == len(input)
    assert d.recognize("c") == -1
    assert d.recognize("ü") == -1

    d = NonGreedyDFA([{NON_ASCII: 0, "b": 1}, {"b": 0}], [False, True])
    input = u"üübbbb".encode("utf-8")
    assert d.recognize(input) == len(u"üüb".encode("utf-8"))
    assert d.recognize("c") == -1

    pytest.raises(ValueError, DFA, [{"\x81": 2}], [True])