File: advanced.re

package info (click to toggle)
re2c 4.1-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 50,052 kB
  • sloc: cpp: 32,477; ml: 8,279; sh: 5,265; makefile: 968; haskell: 612; python: 428; ansic: 227; javascript: 111; java: 3
file content (247 lines) | stat: -rw-r--r-- 6,016 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
//go:generate re2go $INPUT -o $OUTPUT -cf --recursive-functions -Wno-nondeterministic-tags
package main

import (
	"fmt"
	"os"
)

/*!conditions:re2c*/

const debug bool = false

// Intentionally small to trigger buffer refill.
const SIZE int = 4095

const (
	mtagRoot int = -1
	tagNone int = -1
)

// An m-tag tree is a way to store histories with an O(1) copy operation.
// Histories naturally form a tree, as they have common start and fork at some
// point. The tree is stored as an array of pairs (tag value, link to parent).
// An m-tag is represented with a single link in the tree (array index).
type mtagElem struct {
	elem int
	pred int
}
type mtagTrie = []mtagElem

// Append a single value to an m-tag history.
func add_mtag(trie *mtagTrie, mtag int, value int) int {
	*trie = append(*trie, mtagElem{value, mtag})
	return len(*trie) - 1
}

// Recursively unwind tag histories and collect version components.
func unwind(trie mtagTrie, x int, y int, str []byte) []string {
	// Reached the root of the m-tag tree, stop recursion.
	if x == mtagRoot && y == mtagRoot {
		return []string{}
	}

	// Unwind history further.
	result := unwind(trie, trie[x].pred, trie[y].pred, str)

	// Get tag values. Tag histories must have equal length.
	if x == mtagRoot || y == mtagRoot {
		panic("tag histories have different length")
	}
	ex := trie[x].elem
	ey := trie[y].elem

	if ex != tagNone && ey != tagNone {
		// Both tags are valid string indices, extract component.
		result = append(result, string(str[ex:ey]))
	} else if !(ex == tagNone && ey == tagNone) {
		panic("both tags should be tagNone")
	}
	return result
}

type State struct {
	file     *os.File
	yyinput  []byte
	yycursor int
	yymarker int
	yylimit  int
	token    int
	yyc      int
	yyst     int
	trie     mtagTrie
	/*!stags:re2c format = '\n\t@@ int'; */
	/*!mtags:re2c format = '\n\t@@ int'; */
	l1, l2   int
	f1, f2   int
	p1, p2   int
	p3, p4   int
	yyaccept int
}

const (
	lexEnd = iota
	lexReady
	lexWaiting
	lexBadPacket
	lexBigPacket
)

func fill(st *State) int {
	shift := st.token
	used := st.yylimit - st.token
	free := SIZE - used

	// Error: no space. In real life can reallocate a larger buffer.
	if free < 1 { return lexBigPacket }

	// Shift buffer contents (discard already processed data).
	copy(st.yyinput[0:], st.yyinput[shift:shift+used])
	st.yycursor -= shift
	st.yymarker -= shift
	st.yylimit -= shift
	st.token -= shift
	/*!stags:re2c format = '\n\tst.@@ -= shift'; */

	// Fill free space at the end of buffer with new data.
	n, _ := st.file.Read(st.yyinput[st.yylimit:SIZE])
	st.yylimit += n
	st.yyinput[st.yylimit] = 0 // append sentinel symbol

	return lexReady
}

/*!re2c
	re2c:api = record;
	re2c:eof = 0;
	re2c:tags = 1;
	re2c:tags:negative = "tagNone";
	re2c:variable:yyrecord = st;
	re2c:variable:yycond = yyc;
	re2c:variable:yystate = yyst;
	re2c:define:YYFN = ["lex;int", "st;*State"];
	re2c:define:YYCTYPE = "byte";
	re2c:define:YYFILL = "return lexWaiting";
	re2c:define:YYMTAGP = "@@ = add_mtag(&st.trie, @@, st.yycursor)";
	re2c:define:YYMTAGN = "@@ = add_mtag(&st.trie, @@, tagNone)";

	crlf  = '\r\n';
	sp    = ' ';
	htab  = '\t';
	ows   = (sp | htab)*;
	digit = [0-9];
	alpha = [a-zA-Z];
	vchar = [\x1f-\x7e];
	tchar = [-!#$%&'*+.^_`|~] | digit | alpha;

	obs_fold            = #f1 crlf (sp | htab)+ #f2;
	obs_text            = [\x80-\xff];
	field_name          = tchar+;
	field_vchar         = vchar | obs_text;
	field_content       = field_vchar ((sp | htab)+ field_vchar)?;
	field_value_folded  = (field_content* obs_fold field_content*)+;
	header_field_folded = field_value_folded ows;
	token               = tchar+;
	qdtext
		= htab
		| sp
		| [\x21-\x5B\x5D-\x7E] \ '"'
		| obs_text;
	quoted_pair         = '\\' ( htab | sp | vchar | obs_text );
	quoted_string       = '"' ( qdtext | quoted_pair )* '"';
	parameter           = #p1 token #p2 '=' #p3 ( token | quoted_string ) #p4;
	media_type          = @l1 token '/' token @l2 ( ows ';' ows parameter )*;

	<media_type> media_type ows crlf {
		if debug {fmt.Printf("media type: %v\n", string(st.yyinput[st.l1:st.l2]))}

		pnames := unwind(st.trie, st.p1, st.p2, st.yyinput)
		if debug {fmt.Printf("pnames: %v\n", pnames)}

		pvals := unwind(st.trie, st.p3, st.p4, st.yyinput)
		if debug {fmt.Printf("pvals: %v\n", pvals)}

		st.token = st.yycursor
		return lex(st)
	}

	<header> header_field_folded crlf {
		folds := unwind(st.trie, st.f1, st.f2, st.yyinput)
		if debug {fmt.Printf("folds: %v\n", folds)}

		st.token = st.yycursor
		return lex(st)
	}

	<*> $ { return lexEnd }
	<*> * { return lexBadPacket }
*/

func test(packets []string) int {
	fname := "pipe"
	fw, _ := os.Create(fname);
	fr, _ := os.Open(fname);

	st := &State{
		file:     fr,
		yyinput:  make([]byte, SIZE+1),
		yycursor: SIZE,
		yymarker: SIZE,
		yylimit:  SIZE,
		token:    SIZE,
		yyc:      yycmedia_type,
		yyst:     -1,
		trie:     make([]mtagElem, 0),
		/*!stags:re2c format = '\n\t\t@@: tagNone,'; */
		/*!mtags:re2c format = '\n\t\t@@: mtagRoot,'; */
		l1:       0,
		l2:       0,
		f1:       0,
		f2:       0,
		p1:       0,
		p2:       0,
		p3:       0,
		p4:       0,
		yyaccept: 0,
	}
	// yyinput is zero-initialized, no need to write sentinel

	var status int
	send := 0
loop:
	for {
		status = lex(st)
		if status == lexEnd {
			//fmt.Printf("ok, done\n")
			break loop
		} else if status == lexWaiting {
			//fmt.Printf("need more input\n")
			if send < len(packets) {
				//fmt.Printf("sending %d-th packet '%s'\n", send, packets[send])
				fw.WriteString(packets[send])
				send += 1
			}
			status = fill(st)
			if status != lexReady {
				break loop
			}
			//fmt.Printf("refilled buffer, ready to continue\n")
		} else if status == lexBadPacket {
			//fmt.Printf("bad packet\n")
			break loop
		} else {
			panic("unexpected status")
		}
	}

	fr.Close()
	fw.Close()
	os.Remove(fname)

	return status
}

func main() {
	packets := []string{"ap", "plication/j", "son;", " charset=\"", "utf\\\"-8\"\r", "\n", ""}
	if test(packets) != lexEnd { panic("error") }
}