File: pt_pegrammar.tcl

package info (click to toggle)
tcllib 1.14-dfsg-3%2Bdeb7u1
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 33,036 kB
  • sloc: tcl: 148,302; ansic: 14,067; sh: 10,320; xml: 1,766; yacc: 753; pascal: 551; makefile: 129; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (380 lines) | stat: -rw-r--r-- 9,278 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# -*- tcl -*-
# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>

# Verification of serialized PEGs, and conversion between
# serializations and other data structures.

# # ## ### ##### ######## ############# #####################
## Requirements

package require Tcl 8.5                 ; # Required runtime.
package require pt::pe

# # ## ### ##### ######## ############# #####################
##

namespace eval ::pt::peg {
    namespace export \
	verify verify-as-canonical canonicalize print merge equal
    namespace ensemble create
}

# # ## ### ##### ######## #############
## Public API

# Check that the proposed serialization of a keyword index is
# indeed such.

proc ::pt::peg::verify {serial {canonvar {}}} {
    variable ourprefix
    variable ourshort
    variable ourtag
    variable ourcbadlen
    variable ourmiss
    variable ourbadpe
    variable ourcode

    # Basic syntax: Length and outer type code
    if {[llength $serial] != 2} {
	return -code error $ourprefix$ourshort
    }

    lassign $serial tag contents

    if {$tag ne $ourcode} {
	return -code error $ourprefix[format $ourtag $tag]
    }

    # contents = dict (rules, start -> ...)

    if {[llength $contents] != 4} {
	return -code error $ourprefix$ourcbadlen
    }

    # Unpack the contents, then check that all necessary keys are
    # present. Together with the length check we can then also be
    # sure that no other key is present either.
    array set peg $contents
    foreach k {rules start} {
	if {[info exists peg($k)]} continue
	return -code error $ourprefix[format $ourmiss $k]
    }

    if {[catch {
	pt::pe verify $peg(start) canon
    } msg]} {
	return -code error \
	    [string map \
		 [list \
		      {error in serialization:} \
		      $ourprefix[format $ourbadpe start]] \
		 $msg]
    }

    if {$canonvar eq {}} {
	VerifyRules $peg(rules)
    } else {
	upvar 1 $canonvar iscanonical
	set iscanonical $canon

	VerifyRules $peg(rules) iscanonical

	# Quick exit if the inner structure was already
	# non-canonical.
	if {!$iscanonical} return

	# Now various checks if the keys and identifiers are
	# properly sorted to make this a canonical serialization.

	lassign $contents a _ b _
	if {[list $a $b] ne {rules start}} {
	    set iscanonical 0
	}

	if {$serial ne [list {*}$serial]} {
	    set iscanonical 0
	}

	if {$contents ne [list {*}$contents]} {
	    set iscanonical 0
	}
    }

    # Everything checked out.
    return
}

proc ::pt::peg::verify-as-canonical {serial} {
    verify $serial iscanonical
    if {!$iscanonical} {
	variable ourprefix
	variable ourdupsort
	return -code error $ourprefix$ourdupsort
    }
    return
}

proc ::pt::peg::canonicalize {serial} {
    variable ourcode

    verify $serial iscanonical
    if {$iscanonical} { return $serial }

    # Unpack the serialization.
    array set peg $serial
    array set peg $peg($ourcode)
    unset     peg($ourcode)

    # Construct result, inside out
    set rules {}
    array set r $peg(rules)
    foreach symbol [lsort -dict [array names r]] {
	array set sd $r($symbol)
	lappend rules \
	    $symbol [list \
			 is   [pt::pe \
				   canonicalize $sd(is)] \
			 mode $sd(mode)]
	unset sd
    }

    set serial [list $ourcode \
		    [list \
			 rules  $rules \
			 start  [pt::pe \
				     canonicalize $peg(start)]]]
    return $serial
}

# Converts a PEG serialization into a human readable string for
# test results. It assumes that the serialization is at least
# structurally sound.

proc ::pt::peg::print {serial} {
    variable ourcode

    # Unpack the serialization.
    array set peg $serial
    array set peg $peg($ourcode)
    unset     peg($ourcode)
    # Print
    set lines {}
    lappend lines $ourcode
    lappend lines "    start := [join [split [pt::pe print $peg(start)] \n] "\n             "]"
    lappend lines {    rules}
    foreach {symbol value} $peg(rules) {
	array set sd $value
	# keys :: is, mode
	lappend lines "        $symbol :: <$sd(mode)> :="
	lappend lines "            [join [split [pt::pe print $sd(is)] \n] "\n            "]"
	unset sd
    }
    return [join $lines \n]
}

# # ## ### ##### ######## #############

proc ::pt::peg::merge {seriala serialb} {
    variable ourcode

    verify $seriala
    verify $serialb

    array set pega $seriala
    array set pega $pega($ourcode)
    unset     pega($ourcode)

    array set pegb $serialb
    array set pegb $pegb($ourcode)
    unset     pegb($ourcode)

    array set ra $pega(rules)
    array set rb $pegb(rules)

    foreach symbol [array names rb] {
	if {![info exists ra($symbol)]} {
	    # No conflict possible, copy over
	    set ra($symbol) $rb($symbol)
	} else {
	    # unpack definitions, check for conflicts
	    array set sda $ra($symbol)
	    array set sdb $rb($symbol)

	    if {$sda(mode) ne $sdb(mode)} {
		return -code "Merge error for nonterminal \"$symbol\", semantic mode mismatch"
	    }

	    # Merge parsing expressions, if not identical ...
	    if {![pt::pe equal \
		      $sda(is) \
		      $sdb(is)]} {
		set sda(is) [pt::pe choice \
				 $sda(is) \
				 $sdb(is)]
		set ra($symbol) [array get sda]
	    }

	    unset sda
	    unset sdb
	}
    }

    # Construct result, inside out

    set rules {}
    foreach symbol [lsort -dict [array names ra]] {
	array set sd $ra($symbol)
	lappend rules \
	    $symbol [list \
			 is   $sd(is) \
			 mode $sd(mode)]
	unset sd
    }

    if {![pt::pe equal \
	      $pega(start) \
	      $pegb(start)]} {
	set start [pt::pe choice \
		       $pega(start) \
		       $pegb(start)]
    } else {
	set start $pega(start)
    }

    set serial [list $ourcode \
		    [list \
			 rules  $rules \
			 start  $start]]
    return $serial

}

# # ## ### ##### ######## #############

proc ::pt::peg::equal {seriala serialb} {
    # syntactical (intensional) grammar equality.
    string equal \
	[canonicalize $seriala] \
	[canonicalize $serialb]
}

# # ## ### ##### ######## #############


proc ::pt::peg::VerifyRules {rules {canonvar {}}} {
    variable ourprefix
    variable ourrbadlen
    variable oursdup
    variable oursempty
    variable oursbadlen
    variable oursmiss
    variable ourbadpe
    variable ourbadmode
    variable ourmode

    if {$canonvar ne {}} {
	upvar 1 $canonvar iscanonical
    }

    if {[llength $rules] % 2 == 1} {
	return -code error $ourprefix$ourrbadlen
    }

    if {$rules ne [list {*}$rules]} {
	set iscanonical 0
    }

    array set r $rules

    if {([array size r]*2) < [llength $rules]} {
	return -code error $ourprefix$oursdup
    }

    foreach symbol [array names r] {
	if {$symbol eq {}} {
	    return -code error $ourprefix$oursempty
	}

	set def $r($symbol)

	if {[llength $def] != 4} {
	    return -code error $ourprefix[format $oursbadlen $symbol]
	}

	if {$def ne [list {*}$def]} {
	    set iscanonical 0
	}

	array set sd $def
	foreach k {is mode} {
	    if {[info exists sd($k)]} continue
	    return -code error $ourprefix[format $oursmiss $symbol $k]
	}

	if {[catch {
	    pt::pe verify $sd(is) canon
	} msg]} {
	    return -code error \
		[string map \
		     [list \
			  {error in serialization:} \
			  $ourprefix[format $ourbadpe ($symbol)]] \
		     $msg]
	}

	if {![info exists ourmode($sd(mode))]} {
	    return -code error $ourprefix[format $ourbadmode $symbol $sd(mode)]
	}

	# Now various checks if the keys and identifiers are
	# properly sorted to make this a canonical serialization.

	if {!$canon} {
	    set iscanonical 0
	    continue
	}

	lassign $def a _ b _
	if {[list $a $b] ne {is mode}} {
	    set iscanonical 0
	}
    }
    return
}

namespace eval ::pt::peg {
    # # ## ### ##### ######## #############

    variable ourcode      pt::grammar::peg
    variable ourprefix    {error in serialization:}
    #                                                                                  # Test cases (grammar-peg-structure-)
    variable ourshort     { dictionary too short, expected exactly one key}      ; # 
    variable ourtag       { bad type tag "%s"}                                   ; # 
    variable ourcbadlen   { dictionary of bad length, expected exactly two keys} ; # 
    variable ourmiss      { missing expected key "%s"}                           ; # 
    variable oursmiss     { symbol "%s", missing expected key "%s"}                           ; # 
    variable ourbadpe     { bad %s parsing expression:}                      ; # 
    variable ourbadmode   { symbol "%s", bad nonterminal mode "%s"}                           ; # 
    variable ourrbadlen   { rule dictionary of bad length, not a dictionary}     ; # 
    variable oursempty    { expected symbol name, got empty string}
    variable oursbadlen   { symbol dictionary for "%s" of bad length, expected exactly two keys} ; # 
    variable oursdup      { duplicate nonterminal keywords}                                  ; # 
    # Message for non-canonical serialization when expecting canonical form
    variable ourdupsort   { duplicate and/or unsorted keywords and/or irrelevant whitespace}                ; #

    variable  ourmode
    array set ourmode {
	value .
	leaf  .
	void  .
    }

    ##
    # # ## ### ##### ######## #############
}

# # ## ### ##### ######## ############# #####################
## Ready

package provide pt::peg 1
return