File: xml_parser.y

package info (click to toggle)
eglade 0.3.6-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 572 kB
  • ctags: 849
  • sloc: yacc: 429; makefile: 183; sh: 24; ansic: 9
file content (462 lines) | stat: -rw-r--r-- 10,410 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
-- Copyright 1999 Daniel Elphick and others
-- Licensed under Eiffel Forum Freeware License, version 1;
-- (see forum.txt)

%{
indexing

    description: "XML parsing class"
	author: "Daniel Elphick <de397@ecs.soton.ac.uk>"

class XML_PARSER

inherit

    YY_PARSER_SKELETON [ANY]
		rename
			make as make_skeleton
		redefine
			report_error
		end
	XML_TOKENS

creation

    make

%}

%token	<STRING>				TEXTUAL
%token	<STRING>				WHITE_SPACE
%token 							PI_START
%token 							PI_END
%token							END_TAG_START


%type	<LINKED_LIST[ANY]>		tags
%type	<TAG_TREE>				tag
%type	<TAG_TREE>				xml_file
%type	<STRING>				start_tag
%type	<STRING>				end_tag
%type	<STRING>				pi_tag

%% -- Grammar rules and actions follow.

xml_file	: 	pi_tag tags
				{
					!!tree.make("ROOT")
					tree.add_child($1)
					tree.add_children($2)
					$$ := tree
				}
			;

tag			:	start_tag tags end_tag
				{
					if $1.is_equal($3) then
						!!$$.make($1)
						$$.add_children($2)
					else
						std.error.put_string("Start tag '")
						std.error.put_string($1)
						std.error.put_string("' does not match end tag '")
						std.error.put_string($3)
						std.error.put_string("'%N")
						raise_error
					end
				}
			|	start_tag TEXTUAL end_tag
				{
					if $1.is_equal($3) then
						!!$$.make($1)
						$$.add_child($2)
					else
						std.error.put_string("Start tag '")
						std.error.put_string($1)
						std.error.put_string("' does not match end tag '")
						std.error.put_string($3)
						std.error.put_string("'%N")
						raise_error
					end
				}
			;

tags		:	tags tag
				{
					$1.add_last($2)
					$$ := $1
				}
			|	
				{
					!!$$.make
				}
			;

start_tag	:	'<' TEXTUAL '>'
				{
					$$ := $2
				}
			;

end_tag		:	END_TAG_START TEXTUAL '>' 
				{
					$$ := $2
				}
			;

pi_tag		:	PI_START TEXTUAL PI_END 
				{
					$$ := $2
				}
			;
			
%%

feature

	tree: TAG_TREE
	input: INPUT_STREAM
	line_number: INTEGER
	
	make(stream: INPUT_STREAM) is
		do
			input := stream
			line_number := 1
			make_skeleton
			state := initial
		end

	expand_last_value is
		local
			i: INTEGER
			temp: STRING
		do
			temp := last_value
			!!last_value.make(temp.count)
			from
				i := 1
			until
				i > temp.count
			loop
				inspect
					temp @ i
				when '%N' then
					last_value.append("%%N")
				when '%T' then
					last_value.append("%%T")
				when '%%' then
					last_value.append("%%%%")
				when '&' then
					if (temp.count - i) >= 3 then
						if (temp @ (i + 1)) = 'g' then
					    	if (temp @ (i + 2)) = 't' and then
					    	   (temp @ (i + 3)) = ';' then
								i := i + 3
								last_value.append_character('>')
							else
								last_value.append_character('&')
							end
						elseif (temp @ (i + 1)) = 'l' then
							if (temp @ (i + 2)) = 't' and then
							   (temp @ (i + 3)) = ';' then
								i := i + 3
								last_value.append_character('<')
							else
								last_value.append_character('&')
							end
						else
							last_value.append_character('&')
						end
					else
						last_value.append_character('&')
					end
				else
					last_value.append_character(temp @ i)
				end
				i := i + 1
			end
		end

	state: INTEGER
	pending_character: CHARACTER
	has_pending_character: BOOLEAN
	last_character: CHARACTER
	last_value: STRING
	last_token: INTEGER

	error: INTEGER is -1
	yyEOF_token: INTEGER is 0
	initial: INTEGER is 1
	expect: INTEGER is 2
	expect_pi_end: INTEGER is 3
	expect_start_end: INTEGER is 4
	expect_end_end: INTEGER is 5
	in_pi: INTEGER is 6
	in_start: INTEGER is 7
	in_end: INTEGER is 8
	weird: INTEGER is 9

	report_error(a_message: STRING) is
		do
			std.error.put_string(a_message)
			std.error.put_string(" at line ")
			std.error.put_integer(line_number)
			std.error.put_character('%N')
			std.error.put_string("last_token = ")
			std.error.put_integer(last_token)
			std.error.put_character('%N')
			std.error.put_string("last_character = '")
			std.error.put_character(last_character)
			std.error.put_string("'%N")
			std.error.put_string("state = ")
			std.error.put_integer(state)
			std.error.put_string("%N")
		end

	read_token is
		do
			!!last_value.make(0)
			if has_pending_character then
				last_character := input.last_character
				has_pending_character := False
			else
				input.read_character
				if not input.end_of_input then
					last_character := input.last_character
					if last_character = '%N' then
						line_number := line_number + 1
					end
				else
					last_token := yyEOF_token
					state := yyEOF_token
				end
			end
			inspect
				state
			when initial then
				last_token := 9999 -- not a token but > yyEOF_token
				from
				until
					last_character = '<' or else last_token <= yyEOF_token
				loop
					if not ("%T%N <").has(last_character) then
						std.error.put_string("Error in INITIAL state%N")
						last_token := error
					else
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							last_token := yyEOF_token
						end
					end
						
				end
				if last_token /= error and last_token /= yyEOF_token then
					state := expect
					read_token
				end
			when weird then
				from
					last_token := 0
				until
					last_character = '<' or else last_token = error
				loop
					if last_character = '>' then
						std.error.put_string("Error in WEIRD state%N")
						last_token := error
					else
						if not (" %T%N").has(last_character) then
							last_token := TEXTUAL
							from
							until
								last_character = '<' or else last_token = error
							loop
								if last_character = '>' then
									last_token := error
								else
									last_value.append_character(last_character)
									input.read_character
									if input.end_of_input then
										std.error.put_string("Error in WEIRD state%N")
										last_token := error
									else
										last_character := input.last_character
										if last_character = '%N' then
											line_number := line_number + 1
										end
									end
								end
							end
									
						else
							last_value.append_character(last_character)
							input.read_character
							if input.end_of_input then
								std.error.put_string("Error in WEIRD state%N")
								last_token := error
							else
								last_character := input.last_character
								if last_character = '%N' then
									line_number := line_number + 1
								end
							end
						end
					end
				end
				state := expect
				if last_token = 0 then
					input.read_character
					if not input.end_of_input then
						if input.last_character = '/' then
							last_token := TEXTUAL
							expand_last_value
							input.unread_character
						else
							input.unread_character
							read_token
						end
					end
				elseif last_token = TEXTUAL then
					expand_last_value
				end
			when expect then
				inspect
					last_character
				when '?' then
					last_token := PI_START
					state := in_pi
				when '/' then
					last_token := END_TAG_START
					state := in_end
				else
					last_token := ('<').code
					state := in_start
					pending_character := last_character
					has_pending_character := True
				end
			when expect_pi_end then
				if last_character = '>' then
					last_token := PI_END
					state := initial
				else
					std.error.put_string("Error in EXPECT_PI_END state%N")
					last_token := error
				end
			when expect_start_end then
				if last_character = '>' then
					last_token := ('>').code
					state := weird
				else
					std.error.put_string("Error in EXPECT_START_END state%N")
					last_token := error
				end
			when expect_end_end then
				if last_character = '>' then
					last_token := ('>').code
					state := initial
				else
					std.error.put_string("Error in EXPECT_END_END state%N")
					last_token := error
				end
			when in_pi then
				from
				until
					last_character = '>' or else last_token = error
				loop
					if last_character /= '<' then
						last_value.append_character(last_character)
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							std.error.put_string("Error in IN_PI state%N")
							last_token := error
						end
					else
						last_token := error
					end
				end
				if last_token /= error and then last_value.item(last_value.count) = '?' then
					last_value.remove_last(1) -- strip off '?'
					pending_character := '>'
					has_pending_character := True
					last_token := TEXTUAL
					state := expect_pi_end
				else
					std.error.put_string("Error in IN_PI state%N")
					last_token := error
				end
			when in_start then
				from
				until
					last_character = '>' or else last_token = error
				loop
					if last_character /= '<' then
						last_value.append_character(last_character)
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							std.error.put_string("Error in IN_START state%N")
							last_token := error
						end
					else
						std.error.put_string("Error in IN_START state%N")
						last_token := error
					end
				end
				if last_token /= error then
					pending_character := '>'
					has_pending_character := True
					last_token := TEXTUAL
					state := expect_start_end
				end
			when in_end then
				from
				until
					last_character = '>' or else last_token = error
				loop
					if last_character /= '<' then
						last_value.append_character(last_character)
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							std.error.put_string("Error in IN_START state%N")
							last_token := error
						end
					else
						std.error.put_string("Error in IN_START state%N")
						last_token := error
					end
				end
				if last_token /= error then
					pending_character := '>'
					has_pending_character := True
					last_token := TEXTUAL
					state := expect_end_end
				end
			when 0 then
			else
				print("How did we get here?%N")
				die_with_code(exit_failure_code)
			end
		end


	
	
end