File: XmlParse.tcl

package info (click to toggle)
transcriber 1.5.1.1-10
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, buster, jessie, jessie-kfreebsd, sid, stretch, trixie
  • size: 4,448 kB
  • ctags: 2,220
  • sloc: tcl: 13,141; ansic: 1,894; sh: 1,193; makefile: 96; xml: 66
file content (939 lines) | stat: -rw-r--r-- 32,730 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
#!/bin/sh
#\
exec tclsh "$0" "$@"

# RCS: @(#) $Id: XmlParse.tcl,v 1.8 2005/01/11 12:28:30 mantam Exp $

# Copyright (C) 1998-2000, DGA - part of the Transcriber program
# distributed under the GNU General Public License (see COPYING file)
# WWW:          http://trans.sourceforge.net/
# Author:       Claude Barras

################################################################

# Lexical parser for XML

namespace eval ::xml::parser {

   ####### Variables for parsing (regexps, rules)

   # Values shared with other parts of parser

   # Detect Unicode support (Tcl/Tk 8.1 or higher)
   variable i18n [expr ![catch {encoding system}]]

   # Initialize rules and conditions for lexers
   set rules {}
   set conds {} 

   #=======================================================================
   # Regular expressions for XML parsing with tcLex
   # Reference: http://www.w3.org/TR/1998/REC-xml-19980210
   
   #-----------------------------------------------------------------------
   # B - Character Classes
   # Restricted to ISO-Latin-1 for Tcl8.0;
   # Support of Unicode (UTF-8) for Tcl8.1 or higher

   if {$i18n} {
      variable BaseChar "\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3"; # [85]

      variable Ideographic "\u4E00-\u9FA5\u3007\u3021-\u3029"; # [86]

      variable CombiningChar "\u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A"; # [87]

      variable Digit "\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29"; # [88]

      variable Extender "\u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D-\u309E\u30FC-\u30FE"; # [89]

      variable Letter $BaseChar$Ideographic; # [84]
   } else {
      variable CombiningChar ""
      variable Digit "0-9"; # [88]
      variable Extender ""
      variable Letter "A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff"; # [84]
   }

   #-----------------------------------------------------------------------
   # 2.2 - Characters - restricted to ISO-Latin-1 for Tcl/Tk8.0
   # The whole document must match this character set

   if {$i18n} {
      variable Char "\n\t\r\x20-\ud7ff\ue000-\ufffd"; # [2]
   } else {
      variable Char "\n\t\r -\xff"; # [2]
   }

   # as a side-effect, initialize "document" local var to the whole text
   lappend rules {initial} "\[$Char]*(\[^$Char])?" {document char} {
      if {$char != ""} {
	 parse_error "Forbidden char \\x[scan $char %c val; format %x $val]" "" [expr [string length $document]-1]
      }
      [lexer current] reject
      [lexer current] begin $initial
   }

   #-----------------------------------------------------------------------
   # 2.3 - Common Syntactic Constructs

   variable rgS " \n\t\r"
   variable S "\[$rgS]+"; # [3]
   variable S? "\[$rgS]*"; # replacement for "($S)?"
   variable NameChar "\[-._:$Letter$Digit$CombiningChar$Extender]"; # [4]
   variable Name "\[_:$Letter]$NameChar*"; # [5]

   variable Names "$Name\($S$Name)*"; # [6], 1(
   variable Nmtoken "$NameChar+"; # [7]
   variable Nmtokens "$Nmtoken\($S$Nmtoken)*"; # [8], 1(

   # 4.1 - Character and Entity References

   variable CharRef "&\#(\[0-9]+);|&\#x(\[0-9a-fA-F]+);"; # [66], 2(
   variable EntityRef "&($Name);"; # [68], 1(
   variable Reference "$EntityRef|$CharRef"; # [67], 3(
   variable PEReference "%($Name);"; # [69], 1(

   # 2.3 (cont.)

   variable EntityValue "\"(\[^%&\"]|$PEReference|$Reference)*\"|'(\[^%&']|$PEReference|$Reference)*'"; # [9], 10(
   variable AttValue "\"(\[^<&\"]|$Reference)*\"|'(\[^<&']|$Reference)*'"; # [10], 8(
   variable SystemLiteral "(\"\[^\"]*\"|'\[^']*')"; # [11], 1(
   set rgPubid "- \n\ra-zA-Z0-9()+,./:=?;!*\#@\$_%"
   variable PubidChar "\[$rgPubid']"; # [13]
   variable PubidLiteral "(\"$PubidChar*\"|'\[$rgPubid]*')"; # [12], 1(

   #-----------------------------------------------------------------------
   # 2.8 - Prolog

   variable Eq "${S?}=${S?}"; # [25]
   variable VersionNum "\[-a-zA-Z0-9_.:]+"; # [26]
   variable VersionInfo "${S}version${Eq}(\"$VersionNum\"|'$VersionNum\')"; # [24], 1(
   variable SDDecl "${S}standalone${Eq}('yes'|\"yes\"|'no'|\"no\")"; # [32], 1(
   variable EncName "\[A-Za-z]\[-A-Za-z0-9._]*"; # [81]
   variable EncodingDecl "${S}encoding${Eq}(\"$EncName\"|'$EncName\')"; # [80], 1(
   variable XMLDecl "<\\?xml${VersionInfo}($EncodingDecl)?($SDDecl)?${S?}\\?>"; # [23], 5(
   lappend conds prolog-xml

   # XML declaration
   lappend rules {prolog-xml} $XMLDecl {all num has_enco enco has_sd sd} {
      # Only version 1.0 supported
      if {[unquote $num] != "1.0"} {
	 parse_error "xml version $num not supported"
      }
      # Standalone declaration
      if {$has_sd != ""} {
	 set standalone [unquote $sd]
      } else {
	 set standalone ""
      }
      # Encoding should have been handled before
      if {$conf(-debug)} {
	 puts "Xml decl: <?xml version=$num encoding=$enco standalone=$sd?>"
      }
      # Switch to dtd prolog
      [lexer current] end
      [lexer current] begin prolog-dtd
   }

   # detect some wrong XML declarations (spaces, capitalization, missing chars...)
   lappend rules {prolog-xml} "${S?}(<${S?})?(\\?${S?})?(x|X)(m|M)(l|L)\[^\n]*" {all} {
      parse_error "Wrong format for xml declaration $all"
   }

   # Switch to dtd prolog without xml decl
   lappend rules {prolog-xml} . {} {
      [lexer current] reject
      [lexer current] end
      [lexer current] begin prolog-dtd
   }

   #-----------------------------------------------------------------------
   # 2.8 (cont.) - Document Type Declaration

   variable ExternalID "(SYSTEM|PUBLIC$S$PubidLiteral)$S$SystemLiteral"; # [75], 3(
   lappend conds prolog-dtd dtd-decl dtd-int dtd-ext

   # start of DTD declaration
   lappend rules {prolog-dtd} "<!DOCTYPE${S}($Name)($S$ExternalID)?${S?}" {all root has_ext bid publ syst} {
      if {$conf(-debug)} {
	 puts "DTD root $root public $publ system $syst"
      }
      set rootType $root
      set publ [normPubId [unquote $publ]]
      set syst [unquote $syst]
      if {$publ != ""} {
      } elseif {$syst != ""} {
	  # if the dtd used in the trs file is under trans-14.dtd change it for compatibility with transcriber 1.5.0
	  variable olddtd $syst
	  if {[regexp {trans\-([0-9]+).*} $syst all dtdnum] } {
		  if { $dtdnum < 14 } {
                  regsub {[0-9]+} $syst {14} syst
	          variable modifdtd 1
	          }
          }
	  set syst [file join [file dirname $conf(-filename)] $syst]
      }
      # If asked to keep current DTD, verify external DTD filename matches
      # the current one, else the given subset will be read
      if {$conf(-keepdtd)} {
	 set dtdname [::xml::dtd::name]
	 if {$conf(-keepdtd) != 3 && [file tail $dtdname] != [file tail $syst]} {
	    if {$conf(-keepdtd) == 2} {
	      parse_error "External DTD '$syst' doesn't match requested '$dtdname'"
	    } else {
	      if {$conf(-debug)} {
		puts "switching DTD to $syst"
	      }
	      set dtdname $syst
	    }
	 } else {
	    set dtdname ""
	 }
      } else {
	 set dtdname $syst
      }

      [lexer current] end
      [lexer current] begin dtd-decl
   }

   # start of internal subset
   lappend rules {dtd-decl} "\\\[" {} {
      if {$conf(-keepdtd)} {
	 parse_error "Sorry, the current application forbids internal subset"
      }
      [lexer current] begin dtd-int
   }

   # meet parameter reference in DTD
   lappend rules {dtd-int dtd-ext dtd-inc} "$PEReference" {all} {
      if {$conf(-debug)} {
	 puts "DTD ref $ref"
      }
      # We have to expand parameter reference in a new lexer
   }

   # skip spaces
   lappend rules {dtd-int dtd-ext dtd-inc} "$S" {} {
   }

   # end of internal subset
   lappend rules {dtd-int} "]${S?}" {} {
      [lexer current] end
   }

   # end of DTD declaration
   lappend rules {dtd-decl} ">" {} {
      [lexer current] end
      [lexer current] begin prolog-end

      # Read and parse external DTD if needed
      if {$dtdname != ""} {
	if {$conf(-keepdtd)} {
	  ::xml::dtd::init
	  ::xml::dtd::active $conf(-valid)
	}
	if {[catch {
	  eval read_file [list $dtdname] [array get conf] -type dtd
	} msg]} {
	  parse_error $msg $::errorInfo
	}
      }
   }

   #-----------------------------------------------------------------------
   # External Subset
   variable TextDecl "<\\?xml($VersionInfo)?$EncodingDecl${S?}\\?>"; # [77], 3(

   lappend conds prolog-subset

   lappend rules {prolog-subset} $TextDecl {all has_num num enco} {
      [lexer current] end
      [lexer current] begin dtd-ext
   }

   lappend rules {prolog-subset} . {} {
      [lexer current] reject
      [lexer current] end
      [lexer current] begin dtd-ext
   }
   
   #-----------------------------------------------------------------------
   # 3.2 - Element Type Declarations

   variable children "\\(($Name|\\(${S?}|${S?}\\)|${S?},${S?}|${S?}\\|${S?}|\[?*+])*\\)\[?*+]?"; # [47], 1( Simplified regexp - has to be parsed more precisely
   variable Mixed "(\\(${S?}\#PCDATA(${S?}\\|${S?}$Name)*\\)\\*|\\(${S?}\#PCDATA${S?}\\))"; # [51], 1(
   variable contentspec "(EMPTY|ANY|$Mixed|$children)"; # [46], 3(
   variable elementdecl "<!ELEMENT${S}($Name)$S$contentspec${S?}>"; # [45], 4(

   lappend rules {dtd-int dtd-ext dtd-inc} $elementdecl {all type content} {
      if {$conf(-debug)} {
	 puts "DTD Element: <!ELEMENT $type $content>"
      }
      ::xml::dtd::element::declare $type $content
   }

   #-----------------------------------------------------------------------
   # 3.3 - Attribute-List Declarations

   variable StringType "CDATA"; # [55]
   variable TokenizedType "ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS"; # [56]
   variable NotationType "NOTATION$S\\(${S?}$Name\(${S?}\\|${S?}$Name)*${S?}\\)"; # [58], 1(
   variable Enumeration "\\(${S?}$Nmtoken\(${S?}\\|${S?}$Nmtoken)*${S?}\\)"; # [59], 1(
   variable EnumeratedType "$NotationType|$Enumeration"; # [57], 2(
   variable AttType "($StringType|$TokenizedType|$EnumeratedType)"; # [54], 3(
   variable DefaultDecl "(\#REQUIRED|\#IMPLIED|((\#FIXED$S)?($AttValue)))"; # [60], 12(
   variable AttDef "$S\($Name)$S$AttType$S$DefaultDecl"; # [53], 16(
   variable AttlistDecl "<!ATTLIST${S}($Name)(($AttDef)*)${S?}>"; # [52], 19(

   lappend rules {dtd-int dtd-ext dtd-inc} $AttlistDecl {all type content} {
      if {$conf(-debug)} {
	 puts "DTD Attribute: <!ATTLIST $type $content>"
      }
      ::xml::dtd::attribute::declare $type $content
   }

   #-----------------------------------------------------------------------
   # 3.4 - Conditional Sections (DTD external subset only)

   lappend conds dtd-ignore dtd-inc

   lappend rules {dtd-ext dtd-inc} "<!\\\[${S?}INCLUDE${S?}\\\[" {} {
      [lexer current] begin dtd-inc
   }

   lappend rules {dtd-inc} "]]>" {} {
      [lexer current] end
   }

   lappend rules {dtd-ext dtd-inc} "<!\\\[${S?}IGNORE${S?}\\\[" {} {
      [lexer current] begin dtd-ignore
   }

   lappend rules {dtd-ignore} "<!\\\[" {} {
      [lexer current] begin dtd-ignore
   }

   lappend rules {dtd-ignore} "]]>" {} {
      [lexer current] end
   }
   
   #-----------------------------------------------------------------------
   # 4.2 - Entity Declarations

   variable NDataDecl "${S}NDATA$S\($Name)"; # [76], 1(
   variable EntityDef "(($EntityValue)|$ExternalID\($NDataDecl)?)"; # [73], 17(
   variable GEDecl "<!ENTITY$S\($Name)$S$EntityDef${S?}>"; # [71], 18(
   variable PEDef  "(($EntityValue)|$ExternalID)"; # [74], 15(
   variable PEDecl "<!ENTITY$S\%$S\($Name)$S$PEDef${S?}>"; # [72], 16(
   variable EntityDecl "$GEDecl|$PEDecl"; # [70], >> 20 ( -- bad for tcl8.0 defs.

   lappend rules \
       {dtd-int dtd-ext dtd-inc} $GEDecl {all name v0 val v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 has_sys pub sys has_dat dat} {dtd_entity $name "&" $val $pub $sys $dat} \
       {dtd-int dtd-ext dtd-inc} $PEDecl {all name v0 val v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 has_sys pub sys has_dat dat} {dtd_entity $name "%" $val $pub $sys $dat}

   proc dtd_entity {name type value pub sys ndata} {
      upvar conf conf

      if {$conf(-debug)} {
	 puts "DTD Entity $name = $value PUBLIC $pub SYSTEM $sys NDATA $ndata"
      }
      if {$value != ""} {
	 # Declare internal entity
	 ::xml::dtd::entity::declare $type $name [unquote $value]
      } else {
	 # Normalize public id
	 if {$pub != ""} {
	    set pub [string trim [unquote $pub]]
	    regsub -all $S $pub " " pub
	 }
	 # Should normalize system id by escaping non-ascii chars
	 # Declare external entity
	 ::xml::dtd::entity::external $type $name $pub $sys $ndata
      }
   }

   #-----------------------------------------------------------------------
   # 4.7 - Notation Declaration

   variable PublicID "PUBLIC$S\$PubidLiteral"; # [83], 1(
   variable NotationDecl "<!NOTATION$S\($Name)${S}($ExternalID|$PublicID)${S?}>"; # [82], 5(

   lappend rules {dtd-int dtd-ext dtd-inc} $NotationDecl {all name val has_pub1 pub1 sys pub2} {
      set pub "$pub1$pub2"
      if {$conf(-debug)} {
	 puts "DTD Notation $name PUBLIC $pub SYSTEM $sys"
      }
      ::xml::dtd::notation::declare $name $pub $sys
   }

   #-----------------------------------------------------------------------
   # 2.5 - Comments "<!-- ... -->"

   variable Comment "<!--((\[^-]|(-\[^-]))*)-->"; # [15], 3(

   lappend rules {prolog-dtd dtd-int dtd-ext dtd-inc prolog-end content doc-end} $Comment {all comment} {
      if {$conf(-debug)} {
	 puts "Comment: <!--$comment-->"
      }
      # Can be given to the application (but this is not mandatory)
      if {$conf(-comment) && $currentItem != ""} {
	 ::xml::comment $comment -in $currentItem
      }
   }

   #-----------------------------------------------------------------------
   # 2.6 - Processing Instructions "<?PITarget ... ?>"

   lappend conds in-pi1 in-pi2

   lappend rules {prolog-dtd dtd-int dtd-ext dtd-inc prolog-end content doc-end} "<\\?($Name)" {all target} {
      set pi ""
      [lexer current] begin in-pi1
   }

   lappend rules {in-pi1} $S {} {
      [lexer current] end
      [lexer current] begin in-pi2
   }

   lappend rules {in-pi1 in-pi2} "\\?>" {} {
      [lexer current] end
      if {[string tolower $target] == "xml"} {
	 parse_error "processing instruction $target reserved"
      }
      if {$conf(-debug)} {
	 puts "PI $target $pi"
      }
      # Keep only PIs matching given target pattern 
      if {$conf(-pitarget) != "" && $currentItem 
	  && [string match $conf(-pitarget) $target]} {
	 ::xml::pi $target $pi -in $currentItem
      }
   }

   lappend rules {in-pi2} . {char} {
      append pi $char
   }

   #-----------------------------------------------------------------------
   # Skip spaces, switch to content if nothing matches in the prolog

   lappend conds prolog-end doc-end

   lappend rules {prolog-dtd prolog-end doc-end} $S {} {}

   lappend rules {prolog-dtd prolog-end} . {} {
      [lexer current] reject
      [lexer current] end
      [lexer current] begin root
   }

   #-----------------------------------------------------------------------
   # 3.1 - Start-Tags, End-Tags and Empty-Element Tags

   variable Attribute "($Name)${Eq}($AttValue)"; # [40], 10(
   variable ETag "</($Name)${S?}>"; # [42], 1(

   lappend conds root content in-tag

   # begin of Start-Tag
   lappend rules {root content} "<($Name)" {all type} {
      set attribs ""
      [lexer current] begin in-tag
   }

   # attribute inside Start-Tag
   lappend rules {in-tag} "$S$Attribute" {all name val} {
      lappend attribs $name [::xml::dtd::entity::replace [unquote $val]]
   }

   # end of Start-Tag
   lappend rules {in-tag} "${S?}(/)?>" {all empty} {
      # Leave in-tag condition, then switch to doc-end after root element
      [lexer current] end
      set is_root 0
      if {[[lexer current] conditions -current] == "root"} {
	 set is_root 1
	 [lexer current] end
	 [lexer current] begin doc-end
      }
      if {$empty != "/"} {
	 [lexer current] begin content
	 set types($level) $type
	 incr level
      }
      # Registration of start-tag
      if {$conf(-debug)} {
	 puts "Tag: <$type $attribs $empty>"
      }
      # Validity: Root Element Type
      if {$conf(-valid) && $is_root} {
	 if {$rootType == ""} {
	    parse_error "Validation impossible - no DTD found in document"
	 }
	 if {$type != $rootType} {
	    parse_error "Root element type should be $rootType, not $type"
	 }
      }
      # Creation of tag with validation of type/attributes
      if [catch {
	 set tag [::xml::element $type $attribs -in $currentItem]
	 # Validity: required attributes must have been defined
	 if {[::xml::dtd::active]} {
	    $tag valid-attr
	    #${xml}::dtd::attribute::required $tag
	 }
      } msg] {
	 parse_error $msg $::errorInfo
      }
      # Keep root element
      variable rootItem
      if {$rootItem == ""} {
	 set rootItem $tag
      }
      # New element becomes a father
      if {$empty != "/"} {
	 set currentItem $tag
      }
   }

   # End-Tag
   lappend rules {content} $ETag {all type} {
      # For lexical analysis purpose
      [lexer current] end
      incr level -1
      if {$type != $types($level)} {
	 parse_error "Wrong end-tag </$type> - should be </$types($level)>"
      }
      # Registration of end-tag
      if {$conf(-debug)} {
	 puts "End-tag: </$type>"
      }
      # Validate element content (order only)
      if {$conf(-valid)} {
	 if [catch {
	    #::xml::dtd::element::rightOrder $currentItem
	    $currentItem valid-elem
	 } msg] {
	    parse_error $msg $::errorInfo
	 }
      }
      # Go back to father
      set currentItem [$currentItem getFather]
   }

   # References inside content are handled as data (for the moment)
#    lappend rules {content} $Reference {ref} {
#       if {$conf(-debug)} {
# 	 puts "Reference to $ref"
#       }
#       # We should parse it with a sub-lexer !!!
#    }

   #-----------------------------------------------------------------------
   # 2.4 - Character Data

   lappend rules {content} "(\[^<&]|$Reference)+" {data} {
      if {[regexp "]]>" $data]} {
	 parse_error "CDATA-section-close delimiter ']]>' found in character data"
      }
      # Skip white space at user option (should look at xml:space instead)
      if {$conf(-skipspace)} {
	 #set data [string trim $data]
	 #if {[string length $data] <= 0} break
	 regsub "^\[ \t]*\n" $data "" data
	 regsub "\n\[ \t]*$" $data "" data
	 regsub -all "\[ \t]+" $data " " data
	 #set data [string trim $data "\n"]
	 #regsub -all "\[ \n\t]+" $data " " data
	 if {$data == " " || $data == ""} { break ; error "stop!" }
      }

      # Expand entity references (they should be handled otherwise, because
      # entities can contain other markup)
      if {[catch {
	 set data [::xml::dtd::entity::replace $data]
      } msg]} {
	 parse_error $msg $::errorInfo
      }

      if {$conf(-debug)} {
	 puts "Data: \"$data\""
      }
      append_data $data
   }

   # Create a new data item or append to the last one if it exists
   proc append_data {data} {
      upvar conf conf currentItem currentItem

      if {$conf(-valid)} {
	 if {[catch {
	    ::xml::dtd::element::authorized [$currentItem getType] "\#PCDATA"
	 } msg]} {
	    # Only white spaces can be discarded without error
	    # but it should perhaps be registred somewhere else ?
	    variable S
	    if {[regexp "^$S\$" $data]} return
	    uplevel [list parse_error $msg $::errorInfo]
	 }
      }
      set last [lindex [$currentItem getChilds] end]
      if {$last != "" && [$last class] == "data"} {
	 set oldata [$last getData]
	 append oldata $data
	 $last setData $oldata
      } else {
	 ::xml::data $data -in $currentItem
      }
   }
   
   #-----------------------------------------------------------------------
   # 2.7 - CDATA Sections "<![CDATA[ ...]]>"

   variable CDStart "<!\\\[CDATA\\\["; # [19]
   variable CDEnd "]]>"; # [21]

   lappend conds in-cdata

   # start of CDATA section
   lappend rules {content} $CDStart {} {
      set cdata ""
      [lexer current] begin in-cdata
   }

   # end of CDATA section
   lappend rules {in-cdata} $CDEnd {} {
      if {$conf(-debug)} {
	 puts "CData: <!\[CDATA\[$cdata]]>"
      }
      # CDATA are just data
      append_data $cdata
      [lexer current] end
   }

   # inside CDATA section
   lappend rules {in-cdata} . {char} {
      append cdata $char
   }

   #-----------------------------------------------------------------------
   # Final default rule : raise an error on syntax error

   lappend conds error

   lappend rules {*} "<!--\[^\n]*" {line} {
      parse_error "Syntax error in comment \"$line\""
   }

   lappend rules {content} "&\[^\n;]*;?" {line} {
      parse_error "Syntax error in entity reference \"$line\""
   }

   lappend rules {root content} "<\[^\n>]*>?" {line} {
      parse_error "Syntax error in tag \"$line\""
   }

   lappend rules {doc-end} "<\[^\n>]*>?" {line} {
      parse_error "Forbidden tag after root element: \"$line\""
   }

   lappend rules {*} "." {} {
      [lexer current] reject
      [lexer current] begin error
   }

   lappend rules {error} "${S?}(\[^\n]*)" {all line} {
      [lexer current] end
      switch [[lexer current] conditions -current] {
	 "content" {
	    parse_error "Waiting for element content, got \"$line\" "
	 }
	 "in-tag" { 
	    parse_error "Waiting for attribute specification, got \"$line\""
	 }
	 "doc-end" {
	    parse_error "Waiting for comment or PI instructions, got \"$line\""
	 }
	 default {
	    parse_error "...$line "
	 }
      }
   }

   #=======================================================================
   # Raise an error with a message giving file name and error line
   # (to be called exclusively from a rule within the parser)

   proc parse_error {explain {info ""} {pos ""}} {
      upvar conf conf document document

      # Get error character index inside document
      if {$pos == ""} {
	 set pos [[lexer current] index]
      }

      # Find in which line the error occured by counting newlines before
      set before [string range $document 0 $pos]
      set line [expr [regsub -all \n $before {} ignore]+1]

      set msg "XML parse error"
      if {$conf(-filename) != ""} {
	 append msg " on file '$conf(-filename)'"
      }
      error "$msg line $line:\n$explain" $info
   }

   #=======================================================================
   # Create the XML lexer

   package require tcLex

   # Lexer internal variables :
   #   conf(-*) : array of user configurable values
   #   document : string containing the whole document beeing parsed
   #   level :    nb of currently embedded elements
   #   types() :  list of type for each level
   #   dtdname :  name of dtd external subset to read
   #   rootType:  type of root element found in DTD (or empty if no DTD)
   #   currentItem: ID of current element (to be father of element content) 
   # Variables shared by all lexers (i.e. variables in current namespace)
   #   rootItem : element to be returned by the parser

   lexer create xmlex -args {options} -ec $conds -prescript {

      # Default options for parsing (no debug, current file unknown, 
      # don't keep comments or processing instructions, 
      # CDATA sections become data)
      array set conf {
	 -type      "document"
	 -debug     0
	 -filename  ""
	 -valid     1
	 -keepdtd   0
	 -alldoc    0
	 -comment   0
	 -pitarget  ""
	 -cdata     0
	 -skipspace 1
      }
      # Parse args and set authorized options
      foreach {name value} $options {
	 if {[lsearch -exact [array names conf] $name] >= 0} {
	    set conf($name) $value
	 } else {
	    error "XML parser: unknown option '$name'"
	 }
      }
      # Choose the lexer : root document, dtd external subset, ...
      variable rootItem ""
      switch -exact -- $conf(-type) {	 
	 "document" {       
	    set initial "prolog-xml"

	    # Init dtd
	    if {!$conf(-keepdtd)} {
	       ::xml::dtd::init
	    }
	    set rootType ""
	    
	    # Dynamic validating mode for XML elements
	    ::xml::dtd::active $conf(-valid)
	    
	    # Start on an empty node
	    if {$conf(-alldoc)} {
	       set rootItem [::xml::node]
	    }
	    set currentItem $rootItem
	 }
	 "dtd" {
	    set initial "prolog-subset"

	    ::xml::dtd::name $conf(-filename)
	 }
	 default {
	    error "XML parser: unknown document type '$conf(-type)'"
	 }
      }
      set level 0
   } -postscript {
      set cur [[lexer current] conditions -current]
      if {$cur != "doc-end" && $cur != "dtd-ext"} {
	 parse_error "Ended in $cur state"
      }
      # Validate id refs for document
      if {$conf(-valid) && $conf(-type) == "document"} {
	 ::xml::dtd::id::validate
      }
      return $rootItem
   } $rules
      
   #=======================================================================
   # General parsing procedures

   proc parse_doc {txt args} {
      variable rootItem
      
      # All the stuff
      if {[catch {
	 xmlex eval $txt $args
      } err]} {
	 set inf $::errorInfo
	 # Free memory before leaving when an error occurs
	 catch {$rootItem deltree}
	 return -code error -errorinfo $inf $err
      }

      # Return root item of tree
      return $rootItem
   }

   proc read_file {name args} {
      set f [open $name]

      # Automatic detection of encoding for Tcl/Tk 8.1.1
      if {![catch {encoding system}]} {
	 global v
	 set txt [detect_encoding $f [EncodingFromName $v(encoding)]]
      }

      # By default, end-of-lines are handled conforming to 2.11
      append txt [read $f]
      close $f
      return [eval parse_doc [list $txt] $args -filename [list $name]]
   }


   
   proc write_file {name root} {
      if {[$root class] != "element"} {
	 set root [lindex [$root getChilds "element"] 0]
	 if {$root == ""} {error "Empty XML document - not written"}
      }
      set f [open $name w]

      # Header with optional encoding information
      set encString ""
      if {![catch {encoding system}]} {
	 global v
	 set enc [EncodingFromName $v(encoding)]
	 if {$enc != ""} {
	    set encString " encoding=\"$v(encoding)\""
	    fconfigure $f -encoding $enc
	 }
      }
      puts $f "<?xml version=\"1.0\"$encString?>"

      set dtd [::xml::dtd::name]
      if {$dtd != ""} {
	 puts $f "<!DOCTYPE [$root getType] SYSTEM \"[file tail $dtd]\">"
      }
      puts $f [$root dump]
      close $f
   }

   # F - Detect encoding of an XML file coming from a channel and configure it.
   # As a result, returns the characters already read at the beginning of the file
   # (which avoids seeking back to the start of file or channel)
   proc detect_encoding {channel {default "uft-8"}} {

      # Read 2 or 4 bytes first for UCS-4 / UTF-16 / UTF-8 autodectection
      fconfigure $channel -encoding binary
      set txt [read $channel 2]
      if {$txt != "\xfe\xff" && $txt != "\xff\xfe"} {
	 append txt [read $channel 2]
      }

      # First configure to default encoding (should be UTF-8, but it will
      # probably not be the correct guess with transcriptions produced with
      # Transcriber under Tcl/Tk8.0)
      switch -exact -- $txt {
	 "\x4c\x6f\xa7\x94" {
	    error "XML parser: EBCDIC format not supported"
	 }
	 "\x00\x00\x00\x3c" -
	 "\x00\x00\x00\x3c" -
	 "\x00\x00\x00\x3c" -
	 "\x00\x00\x00\x3c" {
	    error "XML parser: UCS-4 format not supported"
	 }
	 "\xfe\xff" -
	 "\xff\xfe" -
	 "\x00\x3c\x00\x3f" -
	 "\x3c\x00\x3f\x00" {
	    set c [encoding convertfrom ascii [string index $txt 0]]
	    if {$c == "\xfe" || $c == "\x00"} {
	       set order "bigEndian"
	    } else {
	       set order "littleEndian"
	    }
	    if {$::tcl_platform(byteOrder) != $order} {
	       error "XML parser: Can't handle swapped UTF-16 Unicode file"
	    }
	    if {$c == "\xfe" || $c == "\xff"} {
	       set txt ""
	    } else {
	       set txt "<?"
	    }
	    fconfigure $channel -encoding "unicode"
	 }
	 "\x3c\x3f\x78\x6d" {
	    # read up to the end of the tag - limited to 1024 chars anyway
	    # (can't do a 'gets' since newline is a legal space within xml decl.)
	    for {set i 0} {$i < 1024} {incr i} {
	       set c [read $channel 1]
	       append txt $c
	       if {$c == ">"} break
	    }
	    # detect UTF-8 or any 7-bit or 8-bit encoding
	    variable VersionInfo
	    variable EncodingDecl
	    if {[regexp "<\\?xml($VersionInfo)?$EncodingDecl" $txt all v1 v2 v3]} {
	       # Look for a Tcl encoding matching the given name
	       set enc [EncodingFromName [string trim $v3 '\"]]
	       if {$enc != ""} {
		  fconfigure $channel -encoding $enc
	       } else {
		  error "XML parser: encoding $v3 not supported"
	       }
	    } else {
	       fconfigure $channel -encoding $default
	    }
	 }
      }
      #puts "Encoding = [fconfigure $channel -encoding]"
      return $txt
   }

   #=======================================================================
   # General procedures

   # Check for well-formedness: value must match regexp of given name
   proc check {reg_name value} {
      variable $reg_name
      if {![regexp "^[set $reg_name]\$" $value]} {
	 parse_error "Value '$value' is not a well-formed $reg_name"
      }
   }

   # Strip quotes around value
   proc unquote {val} {
      if {$val == ""} return
      if {![regexp "^(\[\"'])(.*)(\[\"'])$" $val all sep1 val sep2]
       || $sep1 != $sep2} {
         parse_error "Wrongly quoted string $val"
      }
      return $val
   }

   # Normalize public identifier
   proc normPubId {public} {
      variable S
      regsub -all $S $public " " public
      return [string trim $public]
   }

}

################################################################