1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
// re2java $INPUT -o $OUTPUT -f
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.Pipe;
class Lexer {
enum Status {
END,
READY,
WAITING,
BIG_PACKET,
BAD_PACKET
};
// Use a small buffer to cover the case when a lexeme doesn't fit.
// In real world use a larger buffer.
public static final int BUFSIZE = 10;
public static class State {
Pipe.SourceChannel source;
byte[] yyinput;
int yycursor;
int yymarker;
int yylimit;
int token;
int yystate;
int received;
public State(Pipe pipe) {
source = pipe.source();
// Sentinel at `yylimit` offset is set to zero, which triggers YYFILL.
yyinput = new byte[BUFSIZE + 1];
yycursor = yymarker = yylimit = token = BUFSIZE;
yystate = -1;
received = 0;
}
}
private static void log(String format, Object... args) {
if (false) { System.out.printf(format + "\n", args); }
}
private static Status fill(State st) throws IOException {
// Error: lexeme too long. In real life can reallocate a larger buffer.
if (st.token < 1) { return Status.BIG_PACKET; }
// Shift buffer contents (discard everything up to the current token).
System.arraycopy(st.yyinput, st.token, st.yyinput, 0, st.yylimit - st.token);
st.yycursor -= st.token;
st.yymarker -= st.token;
st.yylimit -= st.token;
st.token = 0;
// Fill free space at the end of buffer with new data from file.
ByteBuffer buffer = ByteBuffer.wrap(st.yyinput, st.yylimit, BUFSIZE - st.yylimit);
int have = st.source.read(buffer);
if (have != -1) st.yylimit += have; // -1 means that pipe is closed
st.yyinput[st.yylimit] = 0; // append sentinel symbol
return Status.READY;
}
private static Status lex(State yyrecord) {
int yych;
loop: while (true) {
yyrecord.token = yyrecord.yycursor;
/*!re2c
re2c:api = record;
re2c:YYCTYPE = "int";
re2c:YYPEEK = "Byte.toUnsignedInt(yyrecord.yyinput[yyrecord.yycursor])";
re2c:YYFILL = "return Status.WAITING;";
re2c:eof = 0;
packet = [a-z]+[;];
* { return Status.BAD_PACKET; }
$ { return Status.END; }
packet { yyrecord.received += 1; continue loop; }
*/
}
}
public static void test(String[] packets, Status expect) throws IOException {
// Create a pipe.
Pipe pipe = Pipe.open();
Pipe.SinkChannel sink = pipe.sink();
// Initialize lexer state
Lexer.State st = new Lexer.State(pipe);
// Main loop. The buffer contains incomplete data which appears packet by
// packet. When the lexer needs more input it saves its internal state and
// returns to the caller which should provide more input and resume lexing.
int send = 0;
Status status;
while (true) {
status = lex(st);
if (status == Status.END) {
log("done: got %d packets", st.received);
break;
} else if (status == Status.WAITING) {
log("waiting...");
if (send < packets.length) {
log("sent packet %d: %s", send, packets[send]);
ByteBuffer buffer = ByteBuffer.wrap(packets[send].getBytes());
sink.write(buffer);
send += 1;
} else {
sink.close();
}
status = fill(st);
if (status == Status.BIG_PACKET) {
log("error: packet too big");
break;
}
assert status == Status.READY;
} else {
assert status == Status.BAD_PACKET;
log("error: ill-formed packet");
break;
}
}
// Check results.
assert status == expect;
if (status == Status.END) {
assert send == st.received;
}
}
public static void main(String []args) throws IOException {
test(new String[]{}, Status.END);
test(new String[]{"zero;", "one;", "two;", "three;", "four;"}, Status.END);
test(new String[]{"zer0;"}, Status.BAD_PACKET);
test(new String[]{"goooooooooogle;"}, Status.BIG_PACKET);
}
};
|