1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
|
# -*- tcl -*-
#
# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
# Interpreter for parsing expression grammars. In essence a recursive
# descent parser configurable with a parsing expression grammar.
# ### ### ### ######### ######### #########
## Package description
## The instances of this class parse a text provided through a channel
## based on a parsing expression grammar provided by a peg container
## object. The parsing process is interpretative, i.e. the parsing
## expressions are decoded and checked on the fly and possibly
## multiple times, as they are encountered.
## The interpreter operates in pull-push mode, i.e. the interpreter
## object is in charge and reads the characters from the channel as
## needed, and returns with the result of the parse, either when
## encountering an error, or when the parse was successful.
# ### ### ### ######### ######### #########
## Requisites
package require Tcl 8.5
package require pt::rde ; # Virtual machine geared to the parsing of PEGs.
package require snit
# ### ### ### ######### ######### #########
## Implementation
snit::type ::pt::peg::interp {
# ### ### ### ######### ######### #########
## Instance API
constructor {} {}
method use {grammar} {}
method parse {channel} {} ; # Parse the contents of the channel
# against the configured grammar.
method parset {text} {} ; # Parse the text against the
# configured grammar.
# ### ### ### ######### ######### #########
## Options
## None
# ### ### ### ######### ######### #########
## Instance API Implementation.
constructor {} {
# Create the runtime supporting the parsing process.
set myparser [pt::rde ${selfns}::ENGINE]
return
}
method use {grammar} {
# Release the information of any previously used grammar.
array unset myrhs *
array unset mymode *
set mystart epsilon
# Copy the grammar into internal tables.
# Note how the grammar is not used in any way, shape, or form
# afterward.
# Note also that it is not required to verify the
# grammar. This was done while it was loaded into the grammar
# object, be it incrementally or at once.
array set myrhs [$grammar rules]
array set mymode [$grammar modes]
set mystart [$grammar start]
return
}
method parse {channel} {
$myparser reset $channel
$self {*}$mystart
return [$myparser complete]
}
method parset {text} {
$myparser reset
$myparser data $text
$self {*}$mystart
return [$myparser complete]
}
# ### ### ### ######### ######### #########
## Parse operator implementation
# No input to parse, nor consume. Ok, always.
method epsilon {} {
$myparser i_status_ok
return
}
# Parse and consume one character. No matter which character. This
# fails only when reaching EOF. Does not consume input on failure.
method dot {} {
$self Next
return
}
# Parse and consume one specific character. This fails if the
# character at the location is not in the specified character
# class. Does not consume input on failure.
foreach operator {
alnum alpha ascii ddigit digit graph
lower print punct space upper wordchar
xdigit
} {
method $operator {} [string map [list @ $operator] {
$self Next
$myparser i:fail_return
$myparser i_test_@
return
}]
}
# Parse and consume one specific character. This fails if the
# character at the location is not the expected character. Does
# not consume input on failure.
method t {char} {
$self Next
$myparser i:fail_return
$myparser i_test_char $char
return
}
# Parse and consume one character, if in the specified range. This
# fails if the read character is outside of the range. Does not
# consume input on failure.
method .. {chstart chend} {
$self Next
$myparser i:fail_return
$myparser i_test_range $chstart $chend
return
}
# To parse a nonterminal symbol in the input we execute its
# parsing expression, i.e its right-hand side. This can be cut
# short if the necessary information can be obtained from the
# nonterminal cache. Does not consume input on failure.
method n {symbol} {
set savemode $mycurrentmode
set mycurrentmode $mymode($symbol)
# Query NC, and shortcut
if {[$myparser i_symbol_restore $symbol]} {
$self ASTFinalize
return
}
# Save location and AST construction state
$myparser i_loc_push ; # (i)
$myparser i_ast_push ; # (1)
# Run the right hand side.
$self {*}$myrhs($symbol)
# Generate a semantic value, based on the currently active
# semantic mode.
switch -exact -- $mycurrentmode {
value { $myparser i_value_clear/reduce $symbol }
leaf { $myparser i_value_clear/leaf $symbol }
void { $myparser i_value_clear }
}
$myparser i_symbol_save $symbol
# Drop ARS. Unconditional as any necessary reduction was done
# already (See (a)), and left the result in SV
$myparser i_ast_pop_rewind ; # (Ad 1)
$self ASTFinalize
# Even if parse is ok.
$myparser i_error_nonterminal $symbol
$myparser i_loc_pop_discard ; # (Ad i)
return
}
# And lookahead predicate. We parse the expression against the
# input and return the parse result. No input is consumed.
method & {expression} {
$myparser i_loc_push
$self {*}$expression
$myparser i_loc_pop_rewind
return
}
# Negated lookahead predicate. We parse the expression against the
# input and returns the negated parse result. No input is
# consumed.
method ! {expression} {
$myparser i_loc_push
$myparser i_ast_push
$self {*}$expression
$myparser i_ast_pop_discard/rewind ;# -- fail/ok
$myparser i_loc_pop_rewind
$myparser i_status_negate
return
}
# Parsing an optional expression. This tries to parse the sub
# expression. It will never fail, even if the sub expression
# itself is not succesful. Consumes only input if it could parse
# the sub expression. Like *, but without the repetition.
method ? {expression} {
$myparser i_loc_push
$myparser i_error_push
$self {*}$expression
$myparser i_error_pop_merge
$myparser i_loc_pop_rewind/discard ;# -- fail/ok
$myparser i_status_ok
return
}
# Parse zero or more repetitions of an expression (Kleene
# closure). This consumes as much input as we were able to parse
# the sub expression. The expresion as a whole is always
# succesful, even if the sub expression fails (zero repetitions).
method * {expression} {
# do { ... } while ok.
while {1} {
$myparser i_loc_push
$myparser i_error_push
$self {*}$expression
$myparser i_error_pop_merge
$myparser i_loc_pop_rewind/discard ;# -- fail/ok
$myparser i:ok_continue
break
}
$myparser i_status_ok
return
}
# Parse one or more repetitions of an expression (Positive kleene
# closure). This is similar to *, except for one round at the
# front which has to parse for success of the whole. This
# expression can fail. It will consume only as much input as it
# was able to parse.
method + {expression} {
$myparser i_loc_push
$self {*}$expression
$myparser i_loc_pop_rewind/discard ;# -- fail/ok
$myparser i:fail_return
$self * $expression
return
}
# Parsing a sequence of expressions. This parses each sub
# expression in turn, each consuming input. In the case of failure
# by one of the sequence's elements nothing is consumed at all.
method x {args} {
$myparser i_loc_push
$myparser i_ast_push
$myparser i_error_clear
foreach expression $args {
$myparser i_error_push
$self {*}$expression
$myparser i_error_pop_merge
# Branch failed, track back and report to caller.
$myparser i:fail_ast_pop_rewind
$myparser i:fail_loc_pop_rewind
$myparser i:fail_return ; # Stop trying on element failure
}
# All elements OK, squash backtracking state
$myparser i_loc_pop_discard
$myparser i_ast_pop_discard
return
}
# Parsing a series of alternatives (Choice). This parses each
# alternative in turn, always starting from the current
# location. Nothing is consumed if all alternatives fail. Consumes
# as much as was consumed by the succesful branch.
method / {args} {
$myparser i_error_clear
foreach expression $args {
$myparser i_loc_push
$myparser i_ast_push
$myparser i_error_push
$self {*}$expression
$myparser i_error_pop_merge
$myparser i_ast_pop_rewind/discard
$myparser i_loc_pop_rewind/discard
$myparser i:fail_continue
return ; # Stop trying on finding a successful branch.
}
# All branches FAIL
$myparser i_status_fail
return
}
# ### ### ### ######### ######### #########
method Next {} {
# We are processing the outer method call into an atomic
# parsing expression for error messaging.
$myparser i_input_next [regsub {^.*Snit_method} [lreplace [info level -1] 1 4] {}]
return
}
method ASTFinalize {} {
if {$mycurrentmode ne "void"} {
$myparser i:ok_ast_value_push
}
upvar 1 savemode savemode
set mycurrentmode $savemode
return
}
# ### ### ### ######### ######### #########
## State Interpreter data structures.
variable myparser {} ; # Our PARAM instantiation.
variable myrhs -array {} ; # Dictionary mapping nonterminal
# symbols to parsing expressions
# describing their sentence
# structure.
variable mymode -array {} ; # Dictionary mapping nonterminal
# symbols to semantic modes
# (controlling AST generation).
variable mystart epsilon ; # The parsing expression to start
# the parse process with.
variable mycurrentmode value ; # The currently active semantic mode.
# ### ### ### ######### ######### #########
## Debugging helper. To activate
## string map {{self {*}} {self TRACE {*}}}
method TRACE {args} {
puts |$args|enter
set res [$self {*}$args]
puts |$args|return
return $res
}
##
# ### ### ### ######### ######### #########
}
# ### ### ### ######### ######### #########
## Package Management
package provide pt::peg::interp 1
|