File: pt_peg_interp.tcl

package info (click to toggle)
tcllib 1.14-dfsg-3%2Bdeb7u1
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 33,036 kB
  • sloc: tcl: 148,302; ansic: 14,067; sh: 10,320; xml: 1,766; yacc: 753; pascal: 551; makefile: 129; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (386 lines) | stat: -rw-r--r-- 10,271 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# -*- tcl -*-
#
# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net>

# Interpreter for parsing expression grammars. In essence a recursive
# descent parser configurable with a parsing expression grammar.

# ### ### ### ######### ######### #########
## Package description

## The instances of this class parse a text provided through a channel
## based on a parsing expression grammar provided by a peg container
## object. The parsing process is interpretative, i.e. the parsing
## expressions are decoded and checked on the fly and possibly
## multiple times, as they are encountered.

## The interpreter operates in pull-push mode, i.e. the interpreter
## object is in charge and reads the characters from the channel as
## needed, and returns with the result of the parse, either when
## encountering an error, or when the parse was successful.

# ### ### ### ######### ######### #########
## Requisites

package require Tcl 8.5
package require pt::rde ; # Virtual machine geared to the parsing of PEGs.
package require snit

# ### ### ### ######### ######### #########
## Implementation

snit::type ::pt::peg::interp {

    # ### ### ### ######### ######### #########
    ## Instance API

    constructor {} {}

    method use {grammar} {}

    method parse {channel} {} ; # Parse the contents of the channel
				# against the configured grammar.

    method parset {text}   {} ; # Parse the text against the
                                # configured grammar.

    # ### ### ### ######### ######### #########
    ## Options

    ## None

    # ### ### ### ######### ######### #########
    ## Instance API Implementation.

    constructor {} {
	# Create the runtime supporting the parsing process.
	set myparser [pt::rde ${selfns}::ENGINE]
	return
    }

    method use {grammar} {
	# Release the information of any previously used grammar.

	array unset myrhs  *
	array unset mymode *
	set mystart epsilon

	# Copy the grammar into internal tables.

	# Note how the grammar is not used in any way, shape, or form
	# afterward.

	# Note also that it is not required to verify the
	# grammar. This was done while it was loaded into the grammar
	# object, be it incrementally or at once.

	array set myrhs  [$grammar rules]
	array set mymode [$grammar modes]
	set mystart      [$grammar start]
	return
    }

    method parse {channel} {
	$myparser reset $channel
	$self {*}$mystart
	return [$myparser complete]
    }

    method parset {text} {
	$myparser reset
	$myparser data $text
	$self {*}$mystart
	return [$myparser complete]
    }

    # ### ### ### ######### ######### #########
    ## Parse operator implementation

    # No input to parse, nor consume. Ok, always.

    method epsilon {} {
	$myparser i_status_ok
	return
    }

    # Parse and consume one character. No matter which character. This
    # fails only when reaching EOF. Does not consume input on failure.

    method dot {} {
	$self Next
	return
    }

    # Parse and consume one specific character. This fails if the
    # character at the location is not in the specified character
    # class. Does not consume input on failure.

    foreach operator {
	alnum alpha ascii ddigit digit  graph
	lower print punct space  upper  wordchar
	xdigit
    } {
	method $operator {} [string map [list @ $operator] {
	    $self Next
	    $myparser i:fail_return
	    $myparser i_test_@
	    return
	}]
    }

    # Parse and consume one specific character. This fails if the
    # character at the location is not the expected character. Does
    # not consume input on failure.
    
    method t {char} {
	$self Next
	$myparser i:fail_return
	$myparser i_test_char $char
	return
    }

    # Parse and consume one character, if in the specified range. This
    # fails if the read character is outside of the range. Does not
    # consume input on failure.

    method .. {chstart chend} {
	$self Next
	$myparser i:fail_return
	$myparser i_test_range $chstart $chend
	return
    }

    # To parse a nonterminal symbol in the input we execute its
    # parsing expression, i.e its right-hand side. This can be cut
    # short if the necessary information can be obtained from the
    # nonterminal cache. Does not consume input on failure.

    method n {symbol} {
	set savemode      $mycurrentmode
	set mycurrentmode $mymode($symbol)

	# Query NC, and shortcut
	if {[$myparser i_symbol_restore $symbol]} {
	    $self ASTFinalize
	    return
	}

	# Save location and AST construction state
	$myparser i_loc_push ; # (i)
	$myparser i_ast_push ; # (1)

	# Run the right hand side.
	$self {*}$myrhs($symbol)

	# Generate a semantic value, based on the currently active
	# semantic mode.
	switch -exact -- $mycurrentmode {
	    value   { $myparser i_value_clear/reduce $symbol }
	    leaf    { $myparser i_value_clear/leaf   $symbol }
	    void    { $myparser i_value_clear }
	}

	$myparser i_symbol_save $symbol

	# Drop ARS. Unconditional as any necessary reduction was done
	# already (See (a)), and left the result in SV
	$myparser i_ast_pop_rewind ; # (Ad 1)
	$self ASTFinalize

	# Even if parse is ok.
	$myparser i_error_nonterminal $symbol
	$myparser i_loc_pop_discard ; # (Ad i)
	return
    }

    # And lookahead predicate. We parse the expression against the
    # input and return the parse result. No input is consumed.

    method & {expression} {
	$myparser i_loc_push

	    $self {*}$expression

	$myparser i_loc_pop_rewind
	return
    }

    # Negated lookahead predicate. We parse the expression against the
    # input and returns the negated parse result. No input is
    # consumed.

    method ! {expression} {
	$myparser i_loc_push
	$myparser i_ast_push

	$self {*}$expression

	$myparser i_ast_pop_discard/rewind ;# -- fail/ok
	$myparser i_loc_pop_rewind
	$myparser i_status_negate
	return
    }

    # Parsing an optional expression. This tries to parse the sub
    # expression. It will never fail, even if the sub expression
    # itself is not succesful. Consumes only input if it could parse
    # the sub expression. Like *, but without the repetition.

    method ? {expression} {
	$myparser i_loc_push
	$myparser i_error_push

	$self {*}$expression

	$myparser i_error_pop_merge
	$myparser i_loc_pop_rewind/discard ;# -- fail/ok
	$myparser i_status_ok
	return
    }

    # Parse zero or more repetitions of an expression (Kleene
    # closure).  This consumes as much input as we were able to parse
    # the sub expression. The expresion as a whole is always
    # succesful, even if the sub expression fails (zero repetitions).

    method * {expression} {
	# do { ... } while ok.
	while {1} {
	    $myparser i_loc_push
	    $myparser i_error_push

	    $self {*}$expression

	    $myparser i_error_pop_merge
	    $myparser i_loc_pop_rewind/discard ;# -- fail/ok
	    $myparser i:ok_continue
	    break
	}
	$myparser i_status_ok
	return
    }

    # Parse one or more repetitions of an expression (Positive kleene
    # closure). This is similar to *, except for one round at the
    # front which has to parse for success of the whole. This
    # expression can fail. It will consume only as much input as it
    # was able to parse.

    method + {expression} {
	$myparser i_loc_push

	$self {*}$expression

	$myparser i_loc_pop_rewind/discard ;# -- fail/ok
	$myparser i:fail_return

	$self * $expression
	return
    }

    # Parsing a sequence of expressions. This parses each sub
    # expression in turn, each consuming input. In the case of failure
    # by one of the sequence's elements nothing is consumed at all.

    method x {args} {
	$myparser i_loc_push
	$myparser i_ast_push
	$myparser i_error_clear

	foreach expression $args {
	    $myparser i_error_push

	    $self {*}$expression

	    $myparser i_error_pop_merge
	    # Branch failed, track back and report to caller.
	    $myparser i:fail_ast_pop_rewind
	    $myparser i:fail_loc_pop_rewind
	    $myparser i:fail_return         ; # Stop trying on element failure
	}

	# All elements OK, squash backtracking state
	$myparser i_loc_pop_discard
	$myparser i_ast_pop_discard
	return
    }

    # Parsing a series of alternatives (Choice). This parses each
    # alternative in turn, always starting from the current
    # location. Nothing is consumed if all alternatives fail. Consumes
    # as much as was consumed by the succesful branch.

    method / {args} {
	$myparser i_error_clear

	foreach expression $args {
	    $myparser i_loc_push
	    $myparser i_ast_push
	    $myparser i_error_push

	    $self {*}$expression

	    $myparser i_error_pop_merge
	    $myparser i_ast_pop_rewind/discard
	    $myparser i_loc_pop_rewind/discard
	    $myparser i:fail_continue
	    return ; # Stop trying on finding a successful branch.
	}

	# All branches FAIL
	$myparser i_status_fail
	return
    }

    # ### ### ### ######### ######### #########

    method Next {} {
	# We are processing the outer method call into an atomic
	# parsing expression for error messaging.
	$myparser i_input_next [regsub {^.*Snit_method} [lreplace [info level -1] 1 4] {}]
	return
    }

    method ASTFinalize {} {
	if {$mycurrentmode ne "void"} {
	    $myparser i:ok_ast_value_push
	}
	upvar 1 savemode savemode
	set mycurrentmode $savemode
	return
    }

    # ### ### ### ######### ######### #########
    ## State Interpreter data structures.

    variable myparser      {}    ; # Our PARAM instantiation.
    variable myrhs  -array {}    ; # Dictionary mapping nonterminal
				   # symbols to parsing expressions
				   # describing their sentence
				   # structure.
    variable mymode -array {}    ; # Dictionary mapping nonterminal
				   # symbols to semantic modes
				   # (controlling AST generation).
    variable mystart  epsilon    ; # The parsing expression to start
				   # the parse process with.
    variable mycurrentmode value ; # The currently active semantic mode.

    # ### ### ### ######### ######### #########
    ## Debugging helper. To activate
    ## string map {{self {*}} {self TRACE {*}}}

    method TRACE {args} {
	puts |$args|enter
	set res [$self {*}$args]
	puts |$args|return
	return $res
    }

    ##
    # ### ### ### ######### ######### #########
}

# ### ### ### ######### ######### #########
## Package Management

package provide pt::peg::interp 1