File: 01_fill.re

package info (click to toggle)
re2c 4.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,512 kB
  • sloc: cpp: 34,160; ml: 8,494; sh: 5,311; makefile: 1,014; haskell: 611; python: 431; ansic: 234; javascript: 113
file content (90 lines) | stat: -rw-r--r-- 2,034 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# re2py $INPUT -o $OUTPUT

from enum import Enum
import os

BUFSIZE = 4096

class State:
    def __init__(self, fname):
        self.file = open(fname, "rb")
        self.yyinput = bytearray(BUFSIZE)
        self.yylimit = BUFSIZE - 1 # exclude terminating null
        self.yycursor = self.yylimit
        self.yymarker = self.yylimit
        self.token = self.yylimit
        self.eof = False

    def __del__(self):
        self.file.close()

class Status(Enum):
    OK = 0
    EOF = 1
    LONG_LEXEME = 2

def fill(st):
    if st.eof:
        return Status.EOF

    # Error: lexeme too long. In real life could reallocate a larger buffer.
    if st.token < 1:
        return Status.LONG_LEXEME

    # Shift buffer contents (discard everything up to the current token).
    st.yyinput = st.yyinput[st.token:st.yylimit]
    st.yycursor -= st.token;
    st.yymarker -= st.token;
    st.yylimit -= st.token;
    st.token = 0;

    # Fill free space at the end of buffer with new data from file.
    bytes = st.file.read(BUFSIZE - st.yylimit - 1) # -1 for sentinel
    if not bytes:
        st.eof = True # end of file
    else:
        st.yylimit += len(bytes);
        st.yyinput += bytes

    st.yyinput += b'\0' # append sentinel

    return Status.OK

def lex(yyrecord, count):
    while True:
        yyrecord.token = yyrecord.yycursor
    %{
        re2c:api = record;
        re2c:define:YYFILL = "fill(yyrecord) == Status.OK";
        re2c:eof = 0;
        re2c:indent:top = 2;

        str = ['] ([^'\\] | [\\][^])* ['];

        *    { return -1 }
        $    { return count }
        [ ]+ { break }
        str  {
            count += 1
            break
        }
    %}

def main():
    fname = "input"

    # Prepare input file.
    f = open(fname, "w")
    for i in range(BUFSIZE):
        f.write("'qu\0tes' 'are' 'fine: \\'' ")
    f.close()

    # Run lexer on the prepared file.
    st = State(fname)
    assert lex(st, 0) == 3 * BUFSIZE

    # Cleanup.
    os.remove(fname)

if __name__ == '__main__':
    main()