File: 02_fill.re

package info (click to toggle)
re2c 4.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,512 kB
  • sloc: cpp: 34,160; ml: 8,494; sh: 5,311; makefile: 1,014; haskell: 611; python: 431; ansic: 234; javascript: 113
file content (97 lines) | stat: -rw-r--r-- 3,201 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// re2java $INPUT -o $OUTPUT

import java.io.*;
import java.nio.file.*;
import java.util.Arrays;

class Lexer {
    /*!max:re2c*/
    public static final int BUFSIZE = 4096;

    private BufferedInputStream stream;
    private byte[] yyinput;
    private int yycursor;
    private int yylimit;
    private int token;
    private boolean eof;

    public Lexer(File file) throws FileNotFoundException {
        stream = new BufferedInputStream(new FileInputStream(file));
        // Prepare lexer state: all offsets are at the end of buffer.
        // This immediately triggers YYFILL, as the YYLESSTHAN condition is true.
        yyinput = new byte[BUFSIZE + YYMAXFILL];
        yycursor = yylimit = token = BUFSIZE;
        eof = false;
    }

    private int fill(int need) throws IOException {
        if (eof) { return -1; } // unexpected EOF

        // Error: lexeme too long. In real life can reallocate a larger buffer.
        if (token < need) { return -2; }

        // Shift buffer contents (discard everything up to the current token).
        System.arraycopy(yyinput, token, yyinput, 0, yylimit - token); 
        yycursor -= token;
        yylimit -= token;
        token = 0;

        // Fill free space at the end of buffer with new data from file.
        yylimit += stream.read(yyinput, yylimit, BUFSIZE - yylimit);
        yyinput[yylimit] = 0; // append sentinel symbol

        // If read less than expected, this is the end of input.
        if (yylimit < BUFSIZE) {
            eof = true;
            Arrays.fill(yyinput, yylimit, yylimit + YYMAXFILL, (byte)0);
            yylimit += YYMAXFILL;
        }

        return 0;
    }

    // Expects a null-terminated string.
    public int lex() throws IOException {
        int count = 0;
        loop: while (true) {
            token = yycursor;
            /*!re2c
                re2c:YYCTYPE = "int";
                re2c:YYPEEK = "Byte.toUnsignedInt(yyinput[yycursor])";
                re2c:YYFILL = "if (fill(@@) != 0) { return -2; }";

                str = ['] ([^'\\] | [\\][^])* ['];

                [\x00] {
                    // Check that it is the sentinel, not some unexpected null.
                    return (token == yylimit - YYMAXFILL) ? count : -1;
                }
                str  { count += 1; continue loop; }
                [ ]+ { continue loop; }
                *    { return -1; }
            */
        }
    }

    public static void main(String []args) throws FileNotFoundException, IOException {
        String fname = "input";
        String content = "'qu\0tes' 'are' 'fine: \\'' ".repeat(Lexer.BUFSIZE);

        // Prepare input file: a few times the size of the buffer, containing
        // strings with zeroes and escaped quotes.
        Files.writeString(Paths.get(fname), content);

        int count = 3 * Lexer.BUFSIZE; // number of quoted strings written to file

        // Prepare lexer state: all offsets are at the end of buffer.
        File file = new File(".", fname);
        Lexer lexer = new Lexer(file);

        // Run the lexer.
        int n = lexer.lex();
        assert n == count;

        // Cleanup: remove input file.
        file.delete();
    }
};