File: automata.py

package info (click to toggle)
pypy3 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 111,848 kB
  • sloc: python: 1,291,746; ansic: 74,281; asm: 5,187; cpp: 3,017; sh: 2,533; makefile: 544; xml: 243; lisp: 45; csh: 21; awk: 4
file content (131 lines) | stat: -rw-r--r-- 4,406 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# ______________________________________________________________________
"""Module automata

THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
instead of a unique nested one)

$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
"""
# ______________________________________________________________________
# Module level definitions

# PYPY Modification: removed the EMPTY class as it's not needed here


# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython
# dicts (see pytokenize.py).  Then DFA.__init__() turns these dicts into
# more compact strings.
DEFAULT = object()

# PYPY Modification : removed all automata functions (any, maybe,
#                     newArcPair, etc.)

ERROR_STATE = chr(255)

# NB: all non-ascii bytes (>= 128) will be turned into 128
NON_ASCII = chr(128)


class DFA:
    # ____________________________________________________________
    def __init__(self, states, accepts, start = 0):
        """ NOT_RPYTHON """
        assert len(states) < 255 # no support for huge amounts of states
        # construct string for looking up state transitions
        string_states = [] * len(states)
        # compute maximum
        maximum = 0
        for state in states:
            for key in state:
                if key == DEFAULT:
                    continue
                ordkey = ord(key)
                if ordkey > 128:
                    raise ValueError("DFA does not support matching of specific non-ASCII character %r. Use NON_ASCII instead" % key)
                maximum = max(ordkey, maximum)
        self.max_char = maximum + 1

        defaults = []
        for i, state in enumerate(states):
            default = ERROR_STATE
            if DEFAULT in state:
                default = chr(state[DEFAULT])
            defaults.append(default)
            string_state = [default] * self.max_char
            for key, value in state.iteritems():
                if key == DEFAULT:
                    continue
                assert len(key) == 1
                assert ord(key) < self.max_char
                string_state[ord(key)] = chr(value)
            string_states.extend(string_state)
        self.states = "".join(string_states)
        self.defaults = "".join(defaults)
        self.accepts = accepts
        self.start = start

    # ____________________________________________________________

    def _next_state(self, item, crntState):
        if ord(item) >= self.max_char:
            return self.defaults[crntState]
        else:
            return self.states[crntState * self.max_char + ord(item)]

    def recognize(self, inVec, pos = 0):
        crntState = self.start
        lastAccept = False
        i = pos
        for i in range(pos, len(inVec)):
            item = inVec[i]
            if ord(item) > 0x80:
                item = NON_ASCII
            accept = self.accepts[crntState]
            crntState = self._next_state(item, crntState)
            if crntState != ERROR_STATE:
                pass
            elif accept:
                return i
            elif lastAccept:
                # This is now needed b/c of exception cases where there are
                # transitions to dead states
                return i - 1
            else:
                return -1
            crntState = ord(crntState)
            lastAccept = accept
        # if self.states[crntState][1]:
        if self.accepts[crntState]:
            return i + 1
        elif lastAccept:
            return i
        else:
            return -1

# ______________________________________________________________________

class NonGreedyDFA (DFA):

    def recognize(self, inVec, pos = 0):
        crntState = self.start
        i = pos
        for i in range(pos, len(inVec)):
            item = inVec[i]
            if ord(item) > 0x80:
                item = NON_ASCII
            accept = self.accepts[crntState]
            if accept:
                return i
            crntState = self._next_state(item, crntState)
            if crntState == ERROR_STATE:
                return -1
            crntState = ord(crntState)
            i += 1
        if self.accepts[crntState]:
            return i
        else:
            return -1

# ______________________________________________________________________
# End of automata.py