File: test_automata.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (47 lines) | stat: -rw-r--r-- 1,487 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# coding: utf-8
import pytest

from pypy.interpreter.pyparser.automata import DFA, NonGreedyDFA, DEFAULT, NON_ASCII

def test_states():
    d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True])
    assert d.states == "\x01\xff\xff\x00"
    assert d.defaults == "\xff\xff"
    assert d.max_char == 2

    d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True])
    assert d.states == "\x01\x00"
    assert d.defaults == "\xff\x00"
    assert d.max_char == 1

def test_recognize():
    d = DFA([{"a": 1}, {"b": 0}], [False, True])
    assert d.recognize("ababab") == 5
    assert d.recognize("c") == -1

    d = DFA([{"a": 1}, {DEFAULT: 0}], [False, True])
    assert d.recognize("a,a?ab") == 5
    assert d.recognize("c") == -1

    d = NonGreedyDFA([{"a": 1}, {"b": 0}], [False, True])
    assert d.recognize("ababab") == 1
    assert d.recognize("c") == -1

    d = NonGreedyDFA([{"a": 1}, {DEFAULT: 0}], [False, True])
    assert d.recognize("a,a?ab") == 1
    assert d.recognize("c") == -1

def test_nonascii():
    d = DFA([{"a": 1}, {NON_ASCII: 1}], [False, True])
    input = u"aüüüü".encode("utf-8")
    assert d.recognize(input) == len(input)
    assert d.recognize("c") == -1
    assert d.recognize("ü") == -1

    d = NonGreedyDFA([{NON_ASCII: 0, "b": 1}, {"b": 0}], [False, True])
    input = u"üübbbb".encode("utf-8")
    assert d.recognize(input) == len(u"üüb".encode("utf-8"))
    assert d.recognize("c") == -1

    pytest.raises(ValueError, DFA, [{"\x81": 2}], [True])