1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
# Generated by re2py
# re2py $INPUT -o $OUTPUT
from enum import Enum
import os
BUFSIZE = 4096
YYMAXFILL = 1
class State:
def __init__(self, fname):
self.file = open(fname, "rb")
self.yyinput = bytearray(BUFSIZE)
self.yylimit = BUFSIZE - YYMAXFILL
self.yycursor = self.yylimit
self.yymarker = self.yylimit
self.token = self.yylimit
self.eof = False
def __del__(self):
self.file.close()
class Status(Enum):
OK = 0
EOF = 1
LONG_LEXEME = 2
def fill(st, need):
if st.eof:
return Status.EOF
# Error: lexeme too long. In real life could reallocate a larger buffer.
if st.token < need:
return Status.LONG_LEXEME
# Shift buffer contents (discard everything up to the current token).
st.yyinput = st.yyinput[st.token:st.yylimit]
st.yycursor -= st.token;
st.yymarker -= st.token;
st.yylimit -= st.token;
st.token = 0;
# Fill free space at the end of buffer with new data from file.
bytes = st.file.read(BUFSIZE - st.yylimit - 1) # -1 for sentinel
if not bytes:
st.eof = True # end of file
st.yylimit += YYMAXFILL
st.yyinput += b"\0" * YYMAXFILL
else:
st.yylimit += len(bytes);
st.yyinput += bytes
return Status.OK
def lex(yyrecord):
count = 0
while True:
yyrecord.token = yyrecord.yycursor
yystate = 0
while True:
match yystate:
case 0:
if yyrecord.yylimit <= yyrecord.yycursor:
if fill(yyrecord, 1) != Status.OK: return -1
yych = yyrecord.yyinput[yyrecord.yycursor]
yyrecord.yycursor += 1
if yych <= 0x20:
if yych <= 0x00:
yystate = 1
continue
if yych <= 0x1F:
yystate = 2
continue
yystate = 3
continue
else:
if yych == 0x27:
yystate = 4
continue
yystate = 2
continue
case 1:
# Check that it is the sentinel, not some unexpected null.
return count if yyrecord.token == yyrecord.yylimit - YYMAXFILL else -1
case 2:
return -1
case 3:
if yyrecord.yylimit <= yyrecord.yycursor:
if fill(yyrecord, 1) != Status.OK: return -1
yych = yyrecord.yyinput[yyrecord.yycursor]
if yych == 0x20:
yyrecord.yycursor += 1
yystate = 3
continue
break
case 4:
if yyrecord.yylimit <= yyrecord.yycursor:
if fill(yyrecord, 1) != Status.OK: return -1
yych = yyrecord.yyinput[yyrecord.yycursor]
yyrecord.yycursor += 1
if yych == 0x27:
yystate = 5
continue
if yych == 0x5C:
yystate = 6
continue
yystate = 4
continue
case 5:
count += 1
break
case 6:
if yyrecord.yylimit <= yyrecord.yycursor:
if fill(yyrecord, 1) != Status.OK: return -1
yyrecord.yycursor += 1
yystate = 4
continue
case _:
raise "internal lexer error"
def main():
fname = "input"
# Prepare input file.
f = open(fname, "w")
for i in range(BUFSIZE):
f.write("'qu\0tes' 'are' 'fine: \\'' ")
f.close()
# Run lexer on the prepared file.
st = State(fname)
assert lex(st) == 3 * BUFSIZE
# Cleanup.
os.remove(fname)
if __name__ == '__main__':
main()
|