1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
// re2zig $INPUT -o $OUTPUT -f
const std = @import("std");
const Status = enum {
end,
ready,
waiting,
bad_packet,
big_packet
};
// Use a small buffer to cover the case when a lexeme doesn't fit.
// In real world use a larger buffer.
const bufsize = 10;
const State = struct {
file: std.fs.File.Reader,
yyinput: [bufsize + 1]u8,
yycursor: usize,
yymarker: usize,
yylimit: usize,
token: usize,
yystate: i32,
received: usize,
};
fn fill(st: *State) Status {
// Error: lexeme too long. In real life can reallocate a larger buffer.
if (st.token < 1) { return Status.big_packet; }
// Shift buffer contents (discard everything up to the current token).
std.mem.copyBackwards(
u8, st.yyinput[0..st.yylimit - st.token], st.yyinput[st.token..st.yylimit]);
st.yycursor -= st.token;
st.yymarker = @subWithOverflow(st.yymarker, st.token)[0];
st.yylimit -= st.token;
st.token = 0;
// Fill free space at the end of buffer with new data from file.
st.yylimit += st.file.interface.readSliceShort(st.yyinput[st.yylimit..bufsize]) catch 0;
st.yyinput[st.yylimit] = 0; // append sentinel symbol
return Status.ready;
}
fn lex(yyrecord: *State) Status {
var yych: u8 = 0;
loop: while (true) {
yyrecord.token = yyrecord.yycursor;
%{
re2c:api = record;
re2c:eof = 0;
re2c:YYFILL = "return Status.waiting;";
packet = [a-z]+[;];
* { return Status.bad_packet; }
$ { return Status.end; }
packet { yyrecord.received += 1; continue :loop; }
%}
}
}
fn run(expect: Status, packets: []const []const u8) !void {
// Create a "pipe" (open the same file for reading and writing).
const fname = "input";
var fw = try std.fs.cwd().createFile(fname, .{});
var fr = try std.fs.cwd().openFile(fname, .{ .mode = .read_only});
// Initialize lexer state: `state` value is -1, all offsets are at the end
// of buffer. Use unbuffered reader - lexer does its own buffering.
const zerobuf: [0]u8 = undefined;
var st = State{
.file = fr.reader(&zerobuf),
.yyinput = undefined,
.yycursor = bufsize,
.yymarker = bufsize,
.yylimit = bufsize,
.token = bufsize,
.yystate = -1,
.received = 0,
};
// Sentinel at `yylimit` offset is set to zero, which triggers YYFILL.
st.yyinput[st.yylimit] = 0;
// Main loop. The buffer contains incomplete data which appears packet by
// packet. When the lexer needs more input it saves its internal state and
// returns to the caller which should provide more input and resume lexing.
var status = Status.ready;
var send: usize = 0;
while (true) {
status = lex(&st);
if (status == Status.end) {
break;
} else if (status == Status.waiting) {
if (send < packets.len) {
std.log.debug("sending packet {}", .{send});
try fw.writeAll(packets[send]);
send += 1;
}
status = fill(&st);
std.log.debug("filled buffer [{s}], status {}", .{st.yyinput, status});
if (status != Status.ready) {
break;
}
} else if (status == Status.bad_packet) {
break;
}
}
// Check results.
try std.testing.expectEqual(status, expect);
if (status == Status.end) { try std.testing.expectEqual(st.received, send); }
// Cleanup: remove input file.
fw.close();
fr.close();
try std.fs.cwd().deleteFile(fname);
}
test {
try run(Status.end, &[_][]const u8{});
try run(Status.end, &[_][]const u8{"zero;", "one;", "two;", "three;", "four;"});
try run(Status.bad_packet, &[_][]const u8{"??;"});
try run(Status.big_packet, &[_][]const u8{"looooooooooooong;"});
}
|