File: pt_peg_from_peg.tcl

package info (click to toggle)
tcllib 1.16-dfsg-2
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 50,040 kB
  • ctags: 18,603
  • sloc: tcl: 156,708; ansic: 14,098; sh: 10,783; xml: 1,766; yacc: 1,114; pascal: 551; makefile: 89; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (394 lines) | stat: -rw-r--r-- 10,504 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
# pt_peg_from_peg.tcl --
#
#	Conversion from PEG (Human readable text) to PEG.
#
# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: pt_peg_from_peg.tcl,v 1.1 2010/03/26 05:07:24 andreas_kupries Exp $

# This package takes text for a human-readable PEG and produces the
# canonical serialization of a parsing expression grammar.

# TODO :: APIs for reading from arbitrary channel.

# ### ### ### ######### ######### #########
## Requisites

package require Tcl 8.5
package require pt::peg  ; # Verification that the input is proper.
#package require pt::peg::interp
#package require pt::peg::container::peg
package require pt::parse::peg
package require pt::ast
package require pt::pe
package require pt::pe::op

# ### ### ### ######### ######### #########
##

namespace eval ::pt::peg::from::peg {
    namespace export   convert convert-file
    namespace ensemble create
}

# ### ### ### ######### ######### #########
## API.

proc ::pt::peg::from::peg::convert {text} {
    # Initialize data for the pseudo-channel
    variable input $text
    variable loc   0
    variable max   [expr { [string length $text] - 1 }]

    return [Convert]
}

proc ::pt::peg::from::peg::convert-file {path} {
    # Initialize data for the pseudo-channel
    variable input [fileutil::cat $path]
    variable loc   0
    variable max   [expr { [string length $input] - 1 }]

    return [Convert]
}

# ### ### ### ######### ######### #########

proc ::pt::peg::from::peg::Convert {} {
    # Create the runtime ...
    set c [chan create read pt::peg::from::peg::CHAN] ; # pseudo-channel for input

    #set g [pt::peg::container::peg %AUTO]             ; # load peg grammar
    #set i [pt::peg::interp         %AUTO% $g]         ; # grammar interpreter / parser
    #$g destroy
    set i [pt::parse::peg]

    # Parse input.
    set fail [catch {
	set ast [$i parse $c]
    } msg]
    if {$fail} {
	set ei $::errorInfo
	set ec $::errorCode
    }

    $i destroy
    close $c

    if {$fail} {
	variable input {}
	return -code error -errorinfo $ei -errorcode $ec $msg
    }

    # Now convert the AST to the grammar serial.
    set serial [pt::ast bottomup \
		    pt::peg::from::peg::GEN \
		    $ast]

    variable input {}
    return $serial

    # ### ### ### ######### ######### #########
}

# ### ### ### ######### ######### #########
## Internals - Pseudo channel to couple the in-memory text with the
## RDE.

namespace eval ::pt::peg::from::peg::CHAN {
    namespace export   initialize finalize read watch
    namespace ensemble create
}

proc pt::peg::from::peg::CHAN::initialize {c mode} {
    return {initialize finalize watch read}
}

proc pt::peg::from::peg::CHAN::finalize {c}        {}
proc pt::peg::from::peg::CHAN::watch    {c events} {}

proc pt::peg::from::peg::CHAN::read {c n} {
    # Note: Should have binary string of the input, to properly handle
    # encodings ...
    variable ::pt::peg::from::peg::input
    variable ::pt::peg::from::peg::loc
    variable ::pt::peg::from::peg::max

    if {$loc >= $max} { return {} }

    set end [expr {$loc + $n - 1}]
    set res [string range $input $loc $end]

    incr loc $n

    return $res
}

# ### ### ### ######### ######### #########
## Internals - Bottom up walk converting AST to PEG serialization.
## Pseudo-ensemble

namespace eval ::pt::peg::from::peg::GEN {}

proc pt::peg::from::peg::GEN {ast} {
    # The reason for not being an ensemble, an additional param
    # (8.6+ can code that as ensemble).
    return [namespace eval GEN $ast]
}

proc pt::peg::from::peg::GEN::ALNUM {s e} {
    return [pt::pe alnum]    
}

proc pt::peg::from::peg::GEN::ALPHA {s e} {
    return [pt::pe alpha]    
}

proc pt::peg::from::peg::GEN::AND {s e} {
    return [pt::pe ahead [pt::pe dot]] ; # -> Prefix
}

proc pt::peg::from::peg::GEN::ASCII {s e} {
    return [pt::pe ascii]    
}

proc pt::peg::from::peg::GEN::Attribute {s e args} {
    return [lindex $args 0] ; # -> Definition
}

proc pt::peg::from::peg::GEN::Char {s e args} {
    return [lindex $args 0]
}

proc pt::peg::from::peg::GEN::CharOctalFull {s e} {
    variable ::pt::peg::from::peg::input
    return [pt::pe terminal [char unquote [string range $input $s $e]]]
}

proc pt::peg::from::peg::GEN::CharOctalPart {s e} {
    variable ::pt::peg::from::peg::input
    return [pt::pe terminal [char unquote [string range $input $s $e]]]
}

proc pt::peg::from::peg::GEN::CharSpecial {s e} {
    variable ::pt::peg::from::peg::input
    return [pt::pe terminal [char unquote [string range $input $s $e]]]
}

proc pt::peg::from::peg::GEN::CharUnescaped {s e} {
    variable ::pt::peg::from::peg::input
    return [pt::pe terminal [string range $input $s $e]]
}

proc pt::peg::from::peg::GEN::CharUnicode {s e} {
    variable ::pt::peg::from::peg::input
    return [pt::pe terminal [char unquote [string range $input $s $e]]]
}

proc pt::peg::from::peg::GEN::Class {s e args} {
    if {[llength $args] == 1} { ; # integrated pe::op flatten
	return [lindex $args 0]
    } else {
	return [pt::pe choice {*}$args] ; # <- Chars and Ranges
    }
}

proc pt::peg::from::peg::GEN::CONTROL {s e} {
    return [pt::pe ddigit]
}

proc pt::peg::from::peg::GEN::DDIGIT {s e} {
    return [pt::pe ddigit]
}

proc pt::peg::from::peg::GEN::Definition {s e args} {
    # args = list/2 (symbol pe)      | <-           Ident(ifier) Expression
    # args = list/3 (mode symbol pe) | <- Attribute Ident(ifier) Expression
    if {[llength $args] == 3} {
	lassign $args mode sym pe
    } else {
	lassign $args sym pe
	set mode value
    }
    # sym = list/2 ('n' name)
    return [list [lindex $sym 1] $mode [pt::pe::op flatten $pe]]
}

proc pt::peg::from::peg::GEN::DIGIT {s e} {
    return [pt::pe digit]
}

proc pt::peg::from::peg::GEN::DOT {s e} {
    return [pt::pe dot]
}

proc pt::peg::from::peg::GEN::Expression {s e args} {
    if {[llength $args] == 1} { ; # integrated pe::op flatten
	return [lindex $args 0]
    } else {
	return [pt::pe choice {*}$args] ; # <- Primary
    }
}

proc pt::peg::from::peg::GEN::Grammar {s e args} {
    # args = list (start, list/3(symbol, mode, rule)...) <- Header Definition*
    array set symbols {}
    set rules {}
    foreach def [lsort -index 0 -dict [lassign $args startexpr]] {
	lassign $def sym mode rhs
	if {[info exists symbol($sym)]} {
	    return -code error "Double declaration of symolb '$sym'"
	}
	set symbols($sym) .
	lappend rules $sym [list is $rhs mode $mode]
    }
    # Full grammar
    return [list pt::grammar::peg [list rules $rules start $startexpr]]
}

proc pt::peg::from::peg::GEN::GRAPH {s e} {
    return [pt::pe graph]
}

proc pt::peg::from::peg::GEN::Header {s e args} {
    # args = list/2 (list/2 ('n', name), pe) <- Ident(ifier) StartExpr
    return [lindex $args 1] ; # StartExpr passes through
}

proc pt::peg::from::peg::GEN::Ident {s e} {
    variable ::pt::peg::from::peg::input
    return [pt::pe nonterminal [string range $input $s $e]]
}

proc pt::peg::from::peg::GEN::Identifier {s e args} {
    return [lindex $args 0] ; # <- Ident, passes through
}

proc pt::peg::from::peg::GEN::LEAF {s e} {
    return leaf
}

proc pt::peg::from::peg::GEN::LOWER {s e} {
    return [pt::pe lower]
}

proc pt::peg::from::peg::GEN::Literal {s e args} {
    set n [llength $args]
    if {$n == 1} {
	# integrated pe::op flatten, return just the char.
	return [lindex $args 0]
    } elseif {$n == 0} {
	# No chars, empty string, IOW epsilon.
	return [pt::pe epsilon]
    } else {
	# Series of chars -> Primary
	return [pt::pe sequence {*}$args]
    }
}

proc pt::peg::from::peg::GEN::NOT {s e} {
    return [pt::pe notahead [pt::pe dot]] ; # -> Prefix (dot is placeholder)
}

proc pt::peg::from::peg::GEN::PLUS {s e} {
    return [pt::pe repeat1 [pt::pe dot]] ; # -> Suffix (dot is placeholder)
}

proc pt::peg::from::peg::GEN::Primary {s e args} {
    return [lindex $args 0] ; # -> Expression, pass through
}

proc pt::peg::from::peg::GEN::Prefix {s e args} {
    # args = list/1 (pe)            | <- AND/NOT, Expression
    # args = list/2 (pe/prefix, pe) | <- Expression
    if {[llength $args] == 2} {
	# Prefix operator present ... Replace its child (dot,
	# placeholder) with our second, the actual expression.
	return [lreplace [lindex $args 0] 1 1 [lindex $args 1]]
    } else {
	# Pass the sub-expression
	return [lindex $args 0]
    }
}

proc pt::peg::from::peg::GEN::PRINTABLE {s e} {
    return [pt::pe printable]
}

proc pt::peg::from::peg::GEN::PUNCT {s e} {
    return [pt::pe punct]    
}

proc pt::peg::from::peg::GEN::QUESTION {s e} {
    return [pt::pe optional [pt::pe dot]] ; # -> Suffix (dot is placeholder)
}

proc pt::peg::from::peg::GEN::Range {s e args} {
    # args = list/1 (pe/t)       | <- Char (pass through)
    # args = list/2 (pe/t, pe/t) | <- Char, Char
    if {[llength $args] == 2} {
	# Convert two terminals to range
	return [pt::pe range [lindex $args 0 1] [lindex $args 1 1]]
    } else {
	# Pass the char ...
	return [lindex $args 0]
    }
}

proc pt::peg::from::peg::GEN::Sequence {s e args} {
    if {[llength $args] == 1} { ; # integrated pe::op flatten
	return [lindex $args 0]
    } else {
	return [pt::pe sequence {*}$args] ; # <- Prefix+
    }
}

proc pt::peg::from::peg::GEN::SPACE {s e} {
    return [pt::pe space]
}

proc pt::peg::from::peg::GEN::STAR {s e} {
    return [pt::pe repeat0 [pt::pe dot]] ; # -> Suffix (dot is placeholder)
}

proc pt::peg::from::peg::GEN::StartExpr {s e args} {
    # args = list/1 (pe) | <- Expression, -> Header
    return [pt::pe::op flatten [lindex $args 0]]
}
proc pt::peg::from::peg::GEN::Suffix {s e args} {
    # args = list/1 (pe)            | <- Expression 
    # args = list/2 (pe, pe/suffix) | <- Expression */+/?
    if {[llength $args] == 2} {
	# Suffix operator present ... Replace its child (dot,
	# placeholder) with our first, the actual expression.
	return [lreplace [lindex $args 1] 1 1 [lindex $args 0]]
    } else {
	# Pass the sub-expression
	return [lindex $args 0]
    }
}

proc pt::peg::from::peg::GEN::UPPER {s e} {
    return [pt::pe upper]   
}

proc pt::peg::from::peg::GEN::VOID {s e} {
    return void
}

proc pt::peg::from::peg::GEN::WORDCHAR {s e} {
    return [pt::pe wordchar]
}

proc pt::peg::from::peg::GEN::XDIGIT {s e} {
    return [pt::pe xdigit]  
}

# ### ### ### ######### ######### #########
## Ready

package provide pt::peg::from::peg 1.0.2
return