File: peg_interp.tcl

package info (click to toggle)
tcllib 2.0%2Bdfsg-4
  • links: PTS
  • area: main
  • in suites: trixie
  • size: 83,572 kB
  • sloc: tcl: 306,798; ansic: 14,272; sh: 3,035; xml: 1,766; yacc: 1,157; pascal: 881; makefile: 124; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (350 lines) | stat: -rw-r--r-- 9,529 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# -*- tcl -*-
#
# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
# Grammar / Parsing Expression Grammar / Interpreter (Namespace based)

# ### ### ### ######### ######### #########
## Package description

## The instances of this class match an input provided by a buffer to
## a parsing expression grammar provided by a peg container. The
## matching process is interpretative, i.e. expressions are matched on
## the fly and multiple as they are encountered. The interpreter
## operates in pull-push mode, i.e. the interpreter object is in
## charge and reads the character stream from the buffer as it needs,
## and returns with the result of the match either when encountering
## an error, or when the match was successful.

# ### ### ### ######### ######### #########
## Requisites

package require grammar::me::tcl

# ### ### ### ######### ######### #########
## Implementation

namespace eval ::grammar::peg::interp {
    # Import the virtual machine for matching.

    namespace import ::grammar::me::tcl::*
    upvar #0 ::grammar::me::tcl::ok ok
}

# ### ### ### ######### ######### #########
## Instance API Implementation.

proc ::grammar::peg::interp::setup {peg} {
    variable ru
    variable mo
    variable se

    if {![$peg is valid]} {
        return -code error "Cannot initialize interpreter for invalid grammar"
    }
    set se [$peg start]
    foreach s [$peg nonterminals] {
        set ru($s) [$peg nonterminal rule $s]
        set mo($s) [$peg nonterminal mode $s]
    }

    #parray mo
    return
}

proc ::grammar::peg::interp::parse {nxcmd emvar astvar} {
    variable ok
    variable se

    upvar 1 $emvar emsg $astvar ast

    init $nxcmd

    MatchExpr $se
    isv_nonterminal_reduce ALL -1
    set ast [sv]
    if {!$ok} {
        foreach {l m} [ier_get] break
        lappend l [lc $l]
        set emsg [list $l $m]
    }

    return $ok
}

# ### ### ### ######### ######### #########
## Internal helper methods

proc ::grammar::peg::interp::MatchExpr {e} {
    variable ok
    variable mode
    variable mo
    variable ru

    set op [lindex $e 0]
    set ar [lrange $e 1 end]

    switch -exact -- $op {
        epsilon {
            # No input to match, nor consume. Match always.
            iok_ok
        }
        dot {
            # Match and consume one character. No matter which
            # character. Fails only when reaching eof. Does not
            # consume input on failure.
            
            ict_advance "Expected any character (got EOF)"
            if {$ok && ($mode eq "value")} {isv_terminal}
        }
        alnum - alpha {
            ict_advance            "Expected <$op> (got EOF)"
            if {!$ok} return

            ict_match_tokclass $op "Expected <$op>"
            if {$ok && ($mode eq "value")} {isv_terminal}
        }
        t {
            # Match and consume one specific character. Fails if
            # the character at the location is not what was
            # expected. Does not consume input on failure.

            set ch [lindex $ar 0]

            ict_advance     "Expected $ch (got EOF)"
            if {!$ok} return

            ict_match_token $ch "Expected $ch"
            if {$ok && ($mode eq "value")} {isv_terminal}
        }
        .. {
            # Match and consume one character, if in the specified
            # range. Fails if the read character is outside of the
            # range. Does not consume input on failure.

            foreach {chbegin chend} $ar break

            ict_advance                        "Expected \[$chbegin .. $chend\] (got EOF)"
            if {!$ok} return

            ict_match_tokrange $chbegin $chend "Expected \[$chbegin .. $chend\]"
            if {$ok && ($mode eq "value")} {isv_terminal}
        }
        n {
            # To match a nonterminal in the input we match its
            # parsing expression. This can be cut short if the
            # necessary information can be obtained from the memo
            # cache. Does not consume input on failure.

            set nt [lindex $ar 0]
            set savemode $mode
            set mode $mo($nt)

            if {[inc_restore $nt]} {
                if {$ok && ($mode ne "discard")} ias_push
                set mode $savemode
                return
            }

            set pos [icl_get]
            set mrk [ias_mark]

            MatchExpr $ru($nt)

            # Generate semantic value, based on mode.
            if {$mode eq "value"} {
                isv_nonterminal_reduce $nt $pos $mrk
            } elseif {$mode eq "match"} {
                isv_nonterminal_range  $nt $pos
            } elseif {$mode eq "leaf"} {
                isv_nonterminal_leaf   $nt $pos
            } else {
                # mode eq "discard"
                isv_clear
            }
            inc_save $nt $pos

            # AST operations ...
            ias_pop2mark $mrk
            if {$ok && ($mode ne "discard")} ias_push

            set mode $savemode
            # Even if match is ok.
	    ier_nonterminal "Expected $nt" $pos
        }
        & {
            # Lookahead predicate. And. Matches the expression
            # against the input and returns match result. Never
            # consumes any input.

            set pos [icl_get]

            MatchExpr [lindex $ar 0]

            icl_rewind $pos
            return
        }
        ! {
            # Negated lookahead predicate. Matches the expression
            # against the input and returns the negated match
            # result. Never consumes any input.

            set pos [icl_get]
            set mrk [ias_mark]
            
            MatchExpr [lindex $ar 0]

            if {$ok} {ias_pop2mark $mrk}
            icl_rewind $pos

            iok_negate
            return
        }
        * {
            # Zero or more repetitions. This consumes as much
            # input as it was able to match the sub
            # expression. The expresion as a whole always matches,
            # even if the sub expression fails (zero repetition).

            set sub [lindex $ar 0]

            while {1} {
                set pos [icl_get]

                set old [ier_get]
                MatchExpr $sub
                ier_merge $old

                if {$ok} continue
		break
            }

	    icl_rewind $pos
	    iok_ok
	    return
        }
        + {
            # One or more repetition. Like *, except for one match
            # at the front which has to match for success. This
            # expression can fail. It will consume only as much
            # input as it was able to match.

            set sub [lindex $ar 0]

            set pos [icl_get]

            MatchExpr $sub
            if {!$ok} {
                icl_rewind $pos
                return
            }

            while {1} {
                set pos [icl_get]

                set old [ier_get]
                MatchExpr $sub
                ier_merge $old

                if {$ok} continue
		break
            }

	    icl_rewind $pos
	    iok_ok
	    return
        }
        ? {
            # Optional matching. Tries to match the sub
            # expression. Will never fail, even if the sub
            # expression is not matching. Consumes only input as
            # it could match in the sub expression. Like *, but
            # without the repetition.

            set pos [icl_get]

	    set old [ier_get]
            MatchExpr [lindex $ar 0]
	    ier_merge $old

            if {!$ok} {
                icl_rewind $pos
                iok_ok
            }
            return
        }
        x {
            # Sequence. Matches each sub expression in turn, each
            # consuming input. In case of failure by one of the
            # sequence elements nothing is consumed at all.

            set pos [icl_get]
            set mrk [ias_mark]
            ier_clear

            foreach e $ar {

                set old [ier_get]
                MatchExpr $e
                ier_merge $old

                if {!$ok} {
                    ias_pop2mark $mrk
                    icl_rewind $pos
                    return
                }
            }
            # OK
            return
        }
        / {
            # Choice. Matches each sub expression in turn, always
            # starting from the current location. Nothing is
            # consumed if all branches fail. Consumes as much as
            # was consumed by the matching branch.

            set pos [icl_get]
            set mrk [ias_mark]

            ier_clear
            foreach e $ar {

                set old [ier_get]
                MatchExpr $e
                ier_merge $old

                if {!$ok} {
                    ias_pop2mark $mrk
                    icl_rewind $pos
                    continue
                }
                return
            }
            # FAIL
            iok_fail
            return
        }
    }
}

# ### ### ### ######### ######### #########
## Interpreter data structures.

namespace eval ::grammar::peg::interp {
    ## Start expression.
    ## Map from nonterminals to their expressions.
    ## Reference to internal memo cache.

    variable se {} ; # Start expression.
    variable ru    ; # Nonterminals and rule map.
    variable mo    ; # Nonterminal modes.

    variable mode value ; # Matching mode.

    array set ru {}
    array set mo {}
}

# ### ### ### ######### ######### #########
## Package Management

package provide grammar::peg::interp 0.1.2