File: expat.n

package info (click to toggle)
tdom 0.9.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,260 kB
  • sloc: ansic: 56,762; xml: 20,797; tcl: 3,618; sh: 658; makefile: 83; cpp: 30
file content (927 lines) | stat: -rw-r--r-- 26,544 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
'\"
'\" Generated from expat.xml
'\"
'\" BEGIN man.macros
.if t .wh -1.3i ^B
.nr ^l \n(.l
.ad b
.de AP
.ie !"\\$4"" .TP \\$4
.el \{\
.   ie !"\\$2"" .TP \\n()Cu
.   el          .TP 15
.\}
.ta \\n()Au \\n()Bu
.ie !"\\$3"" \{\
\&\\$1	\\fI\\$2\\fP	(\\$3)
.\".b
.\}
.el \{\
.br
.ie !"\\$2"" \{\
\&\\$1	\\fI\\$2\\fP
.\}
.el \{\
\&\\fI\\$1\\fP
.\}
.\}
..
.de AS
.nr )A 10n
.if !"\\$1"" .nr )A \\w'\\$1'u+3n
.nr )B \\n()Au+15n
.\"
.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n
.nr )C \\n()Bu+\\w'(in/out)'u+2n
..
.AS Tcl_Interp Tcl_CreateInterp in/out
.de BS
.br
.mk ^y
.nr ^b 1u
.if n .nf
.if n .ti 0
.if n \l'\\n(.lu\(ul'
.if n .fi
..
.de BE
.nf
.ti 0
.mk ^t
.ie n \l'\\n(^lu\(ul'
.el \{\
.\"	Draw four-sided box normally, but don't draw top of
.\"	box if the box started on an earlier page.
.ie !\\n(^b-1 \{\
\h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul'
.\}
.el \}\
\h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul'
.\}
.\}
.fi
.br
.nr ^b 0
..
.de VS
.if !"\\$2"" .br
.mk ^Y
.ie n 'mc \s12\(br\s0
.el .nr ^v 1u
..
.de VE
.ie n 'mc
.el \{\
.ev 2
.nf
.ti 0
.mk ^t
\h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n'
.sp -1
.fi
.ev
.\}
.nr ^v 0
..
.de ^B
.ev 2
'ti 0
'nf
.mk ^t
.if \\n(^b \{\
.\"	Draw three-sided box if this is the box's first page,
.\"	draw two sides but no top otherwise.
.ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c
.el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c
.\}
.if \\n(^v \{\
.nr ^x \\n(^tu+1v-\\n(^Yu
\kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c
.\}
.bp
'fi
.ev
.if \\n(^b \{\
.mk ^y
.nr ^b 2
.\}
.if \\n(^v \{\
.mk ^Y
.\}
..
.de DS
.RS
.nf
.sp
..
.de DE
.fi
.RE
.sp
..
.de SO
.SH "STANDARD OPTIONS"
.LP
.nf
.ta 5.5c 11c
.ft B
..
.de SE
.fi
.ft R
.LP
See the \\fBoptions\\fR manual entry for details on the standard options.
..
.de OP
.LP
.nf
.ta 4c
Command-Line Name:	\\fB\\$1\\fR
Database Name:	\\fB\\$2\\fR
Database Class:	\\fB\\$3\\fR
.fi
.IP
..
.de CS
.RS
.nf
.ta .25i .5i .75i 1i
.if t .ft C
..
.de CE
.fi
.if t .ft R
.RE
..
.de UL
\\$1\l'|0\(ul'\\$2
..
'\" END man.macros
.TH expat n "" Tcl ""
.BS
.SH NAME
expat \- Creates an instance of an expat parser object
.SH SYNOPSIS
.nf
package require tdom

\&\fBexpat\fP ?\fIparsername\fR? ?\fI-namespace\fR? ?\fIarg arg ..\fR

\&\fBxml::parser\fP ?\fIparsername\fR? ?\fI-namespace\fR? ?\fIarg arg ..\fR
.fi
.BE
.SH DESCRIPTION
.PP
The parser created with \fIexpat\fR or \fIxml::parser\fR
(which is just another name for the same command in an own namespace) are able
to parse any kind of well-formed XML. The parsers are stream oriented XML
parser. This means that you register handler scripts with the parser prior to
starting the parse. These handler scripts are called when the parser discovers
the associated structures in the document being parsed.  A start tag is an
example of the kind of structures for which you may register a handler
script.
.PP
The parsers always check for XML well-formedness of the input (and
report error, if the input isn't well-formed). They parse the internal
DTD and, at request, external DTD and external entities, if you
resolve the identifier of the external entities with the
-externalentitycommand script (see there). If you use the -validateCmd
option (see there), the input is additionally validated.
.PP
Additionly, the Tcl extension code that implements this command provides an
API for adding C level coded handlers. Up to now, there exists the parser
extension command "tdom". The handler set installed by this extension build an
in memory "tDOM" DOM tree, while the parser is parsing the input.
.PP
It is possible to register an arbitrary amount of different handler scripts
and C level handlers for most of the events. If the event occurs, they are
called in turn.
.SH "COMMAND OPTIONS"
.IP "\fB-namespace\fR"
.RS
.PP
Enables namespace parsing. You must use this option while
creating the parser with the \fBexpat\fR or \fBxml::parser\fR
command. You can't enable (nor disable) namespace parsing with
\&\fB<parserobj> configure ...\fR.
.RE
.IP "\fB-namespaceseparator  \fIchar\fP\fR"
.RS
.PP
This option has only effect, if used together with
the option \fI-namespace\fR. If given, this option
determines the character inserted between namespace URI and
the local name, while reporting an XML element name to a
handler script. The default is the character ':'. The
value must be a one-character string less or equal to
\&\eu00FF, preferably a 7-bit ASCII character or the empty
string. If the value is the empty string (as well, as if the
value is \ex00) the namespace URI and the local name will be
concatenated without any separator.
.RE
.IP "\fB-final  \fIboolean\fP\fR"
.RS
.PP
This option indicates whether the document data next
presented to the parse method is the final part of the document. A value of "0"
indicates that more data is expected. A value of "1" indicates that no more is
expected.  The default value is "1".
.PP
If this option is set to "0" then the parser will not report certain errors
if the XML data is not well-formed upon end of input, such as unclosed or
unbalanced start or end tags. Instead some data may be saved by the parser
until the next call to the parse method, thus delaying the reporting of some of
the data.
.PP
If this option is set to "1" then documents which are not well-formed upon
end of input will generate an error.
.RE
.IP "\fB-validateCmd  \fI<tdom schema cmd>\fP\fR"
.RS
.PP
This option expects the name of a tDOM schema
command. If this option is given, then the input is also
validated. If the schema command hasn't set a reportcmd then
the first validation error will stop further parsing (as a
well-formedness error).
.RE
.IP "\fB-baseurl  \fIurl\fP\fR"
.RS
.PP
Reports the base url of the document to the
parser.
.RE
.IP "\fB-elementstartcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with the start tag of
an element. The actual command consists of this option followed by at least two
arguments: the element type name and the attribute list.
.PP
The attribute list is a Tcl list consisting of name/value pairs, suitable
for passing to the array set Tcl command.
.PP
Example:


.CS
proc HandleStart {name attlist} {
    puts stderr "Element start ==> $name has attributes $attlist"
}

$parser configure -elementstartcommand HandleStart

$parser parse {<test id="123"></test>}

.CE
.PP
This would result in the following command being invoked:


.CS
HandleStart text {id 123}
.CE
.RE
.IP "\fB-elementendcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with the end tag of an
element. The actual command consists of this option followed by at least one
argument: the element type name. In addition, if the -reportempty option is set
then the command may be invoked with the -empty configuration option to
indicate whether it is an empty element. See the description of the
-reportempty option for an example.
.PP
Example:


.CS
proc HandleEnd {name} {
    puts stderr "Element end ==> $name"
}

$parser configure -elementendcommand HandleEnd

$parser parse {<test id="123"></test>}

.CE
.PP
This would result in the following command being invoked:


.CS

HandleEnd test

.CE
.RE
.IP "\fB-characterdatacommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with character data in
the document, ie. text. The actual command consists of this option followed by
one argument: the text.
.PP
It is not guaranteed that character data will be passed to the application
in a single call to this command. That is, the application should be prepared
to receive multiple invocations of this callback with no intervening callbacks
from other features.
.PP
Example:



.CS
proc HandleText {data} {
    puts stderr "Character data ==> $data"
}

$parser configure -characterdatacommand HandleText

$parser parse {<test>this is a test document</test>}

.CE
.PP
This would result in the following command being invoked:



.CS
HandleText {this is a test document}
.CE
.RE
.IP "\fB-processinginstructioncommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with processing
instructions in the document. The actual command consists of this option
followed by two arguments: the PI target and the PI data.
.PP
Example:



.CS
proc HandlePI {target data} {
    puts stderr "Processing instruction ==> $target $data"
}

$parser configure -processinginstructioncommand HandlePI

$parser parse {<test><?special this is a processing instruction?></test>}

.CE
.PP
This would result in the following command being invoked:



.CS

HandlePI special {this is a processing instruction}

.CE
.RE
.IP "\fB -notationdeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with notation
declaration in the document. The actual command consists of this option
followed by four arguments: the notation name, the base uri of the document
(this means, whatever was set by the -baseurl option), the system identifier
and the public identifier. The notation name is never empty, the other
arguments may be.
.RE
.IP "\fB -externalentitycommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with references to
external entities in the document. The actual command consists of this option
followed by three arguments: the base uri, the system identifier of the entity
and the public identifier of the entity. The base uri and the public identifier
may be the empty list.
.PP
This handler script has to return a tcl list consisting of three
elements. The first element of this list signals, how the external entity is
returned to the processor. At the moment, the three allowed types are
"string", "channel" and "filename". The second
element of the list has to be the (absolute) base URI of the external entity to
be parsed.  The third element of the list are data, either the already read
data out of the external entity as string in the case of type
"string", or the name of a tcl channel, in the case of type
"channel", or the path to the external entity to be read in case of
type "filename". Behind the scene, the external entity referenced by
the returned Tcl channel, string or file name will be parsed with an expat
external entity parser with the same handler sets as the main parser. If
parsing of the external entity fails, the whole parsing is stopped with an
error message. If a Tcl command registered as externalentitycommand isn't able
to resolve an external entity it is allowed to return TCL_CONTINUE. In this
case, the wrapper give the next registered externalentitycommand a try. If no
externalentitycommand is able to handle the external entity parsing stops with
an error.
.PP
Example:



.CS
proc externalEntityRefHandler {base systemId publicId} {
    if {![regexp {^[a-zA-Z]+:/} $systemId]}  {
        regsub {^[a-zA-Z]+:} $base {} base
        set basedir [file dirname $base]
        set systemId "[set basedir]/[set systemId]"
    } else {
        regsub {^[a-zA-Z]+:} $systemId systemId
    }
    if {[catch {set fd [open $systemId]}]} {
        return -code error \e
                -errorinfo "Failed to open external entity $systemId"
    }
    return [list channel $systemId $fd]
}

set parser [expat -externalentitycommand externalEntityRefHandler \e
                  -baseurl "file:///local/doc/doc.xml" \e
                  -paramentityparsing notstandalone]
$parser parse {<?xml version='1.0'?>
<!DOCTYPE test SYSTEM "test.dtd">
<test/>}

.CE
.PP
This would result in the following command being invoked:



.CS

externalEntityRefHandler file:///local/doc/doc.xml test.dtd {}

.CE
.PP
External entities are only tried to resolve via this handler script, if
necessary. This means, external parameter entities triggers this handler only,
if -paramentityparsing is used with argument "always" or if
-paramentityparsing is used with argument "notstandalone" and the
document isn't marked as standalone.
.RE
.IP "\fB -unknownencodingcommand  \fIscript\fP\fR"
.RS
.PP
Not implemented at Tcl level.
.RE
.IP "\fB-startnamespacedeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with start scope of
namespace declarations in the document. The actual command consists of this
option followed by two arguments: the namespace prefix and the namespace
URI. For an xmlns attribute, prefix will be the empty list.  For an
xmlns="" attribute, uri will be the empty list. The call to the start
and end element handlers occur between the calls to the start and end namespace
declaration handlers.
.RE
.IP "\fB -endnamespacedeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with end scope of
namespace declarations in the document. The actual command consists of this
option followed by the namespace prefix as argument. In case of an xmlns
attribute, prefix will be the empty list. The call to the start and end element
handlers occur between the calls to the start and end namespace declaration
handlers.
.RE
.IP "\fB -commentcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with comments in the
document. The actual command consists of this option followed by one argument:
the comment data.
.PP
Example:



.CS

proc HandleComment {data} {
    puts stderr "Comment ==> $data"
}

$parser configure -commentcommand HandleComment

$parser parse {<test><!-- this is <obviously> a comment --></test>}

.CE
.PP
This would result in the following command being invoked:



.CS

HandleComment { this is <obviously> a comment }

.CE
.RE
.IP "\fB -notstandalonecommand  \fIscript\fP\fR"
.RS
.PP
This Tcl command is called, if the document is not
standalone (it has an external subset or a reference to a parameter entity, but
does not have standalone="yes"). It is called with no additional
arguments.
.RE
.IP "\fB -startcdatasectioncommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with the start of a
CDATA section.  It is called with no additional arguments.
.RE
.IP "\fB -endcdatasectioncommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with the end of a CDATA
section.  It is called with no additional arguments.
.RE
.IP "\fB -elementdeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with element
declarations. The actual command consists of this option followed by two
arguments: the name of the element and the content model. The content model arg
is a tcl list of four elements. The first list element specifies the type of
the XML element; the six different possible types are reported as
"MIXED", "NAME", "EMPTY", "CHOICE",
"SEQ" or "ANY". The second list element reports the
quantifier to the content model in XML Syntax ("?", "*" or
"+") or is the empty list. If the type is "MIXED", then the
quantifier will be "{}", indicating an PCDATA only element, or
"*", with the allowed elements to intermix with PCDATA as tcl list as
the fourth argument. If the type is "NAME", the name is the third
arg; otherwise the third argument is the empty list. If the type is
"CHOICE" or "SEQ" the fourth argument will contain a list
of content models build like this one. The "EMPTY", "ANY",
and "MIXED" types will only occur at top level.
.PP
Examples:



.CS

proc elDeclHandler {name content} {
     puts "$name $content"
}

set parser [expat -elementdeclcommand elDeclHandler]
$parser parse {<?xml version='1.0'?>
<!DOCTYPE test [
<!ELEMENT test (#PCDATA)> 
]>
<test>foo</test>}

.CE
.PP
This would result in the following command being invoked:



.CS

test {MIXED {} {} {}}

$parser reset
$parser parse {<?xml version='1.0'?>
<!DOCTYPE test [
<!ELEMENT test (a|b)>
]>
<test><a/></test>}

.CE
.PP
This would result in the following command being invoked:



.CS

elDeclHandler test {CHOICE {} {} {{NAME {} a {}} {NAME {} b {}}}}

.CE
.RE
.IP "\fB -attlistdeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with attlist
declarations. The actual command consists of this option followed by five
arguments.  The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will generate
multiple calls to this handler. The arguments are the element name this
attribute belongs to, the name of the attribute, the type of the attribute, the
default value (may be the empty list) and a required flag. If this flag is true
and the default value is not the empty list, then this is a "#FIXED"
default.
.PP
Example:



.CS

proc attlistHandler {elname name type default isRequired} {
    puts "$elname $name $type $default $isRequired"
}

set parser [expat -attlistdeclcommand attlistHandler]
$parser parse {<?xml version='1.0'?>
<!DOCTYPE test [
<!ELEMENT test EMPTY>
<!ATTLIST test
          id      ID      #REQUIRED
          name    CDATA   #IMPLIED>
]>
<test/>}

.CE
.PP
This would result in the following commands being invoked:



.CS

attlistHandler test id ID {} 1
attlistHandler test name CDATA {} 0

.CE
.RE
.IP "\fB -startdoctypedeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with the start of the
DOCTYPE declaration. This command is called before any DTD or internal subset
is parsed.  The actual command consists of this option followed by four
arguments: the doctype name, the system identifier, the public identifier and a
boolean, that shows if the DOCTYPE has an internal subset.
.RE
.IP "\fB -enddoctypedeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with the end of the
DOCTYPE declaration. This command is called after processing any external
subset.  It is called with no additional arguments.
.RE
.IP "\fB -paramentityparsing  \fInever|notstandalone|always\fP\fR"
.RS
.PP
"never" disables expansion of parameter entities,
"always" expands always and "notstandalone" only, if the
document isn't "standalone='no'". The default ist "never"
.RE
.IP "\fB -entitydeclcommand  \fIscript\fP\fR"
.RS
.PP
Specifies a Tcl command to associate with any entity
declaration. The actual command consists of this option followed by seven
arguments: the entity name, a boolean identifying parameter entities, the value
of the entity, the base uri, the system identifier, the public identifier and
the notation name. According to the type of entity declaration some of this
arguments may be the empty list.
.RE
.IP "\fB -ignorewhitecdata  \fIboolean\fP\fR"
.RS
.PP
If this flag is set, element content which contain only
whitespaces isn't reported with the
\&\fB-characterdatacommand\fR.
.RE
.IP "\fB -ignorewhitespace  \fIboolean\fP\fR"
Another name for \fI -ignorewhitecdata\fR; see there.
.IP "\fB -handlerset  \fIname\fP\fR"
.RS
.PP
This option sets the Tcl handler set scope for the
configure options. Any option value pair following this option in the
same call to the parser are modifying the named Tcl handler set. If
you don't use this option, you are modifying the default Tcl handler
set, named "default".
.RE
.IP "\fB -noexpand  \fIboolean\fP\fR"
.RS
.PP
Normally, the parser will try to expand references to
entities defined in the internal subset. If this option is set to a true value
this entities are not expanded, but reported literal via the default
handler. \fBWarning:\fR If you set this option to true and doesn't install a
default handler (with the -defaultcommand option) for every handler set of the
parser all internal entities are silent lost for the handler sets without a
default handler.
.RE
.IP "\fB-useForeignDTD  \fI<boolen>\fP\fR"
If <boolen> is true and the document does not have an
external subset, the parser will call the -externalentitycommand script with
empty values for the systemId and publicID arguments. This option must be set,
before the first piece of data is parsed. Setting this option, after the
parsing has started has no effect. The default is not to use a foreign DTD. The
default is restored, after resetting the parser. Pleace notice, that a
-paramentityparsing value of "never" (which is the default) suppresses any call
to the -externalentitycommand script. Pleace notice, that, if the document also
doesn't have an internal subset, the -startdoctypedeclcommand and
enddoctypedeclcommand scripts, if set, are not called.
.IP "\fB-billionLaughsAttackProtectionMaximumAmplification  \fI<float>\fP\fR"
.UR "https://en.wikipedia.org/wiki/Billion_laughs_attack"
<URL: https://en.wikipedia.org/wiki/Billion_laughs_attack>
.UE
This option together with
\&\fI-billionLaughsAttackProtectionActivationThreshold\fR
gives control over the parser limits that protects
against billion laugh attacks
().
This option expects a float >= 1.0 as argument. You
should never need to use this option, because the
default value (100.0) should work for any real data.
If you ever need to increase this value for non-attack
payload, please report.
.IP "\fB-billionLaughsAttackProtectionActivationThreshold  \fI<long>\fP\fR"
.UR "https://en.wikipedia.org/wiki/Billion_laughs_attack"
<URL: https://en.wikipedia.org/wiki/Billion_laughs_attack>
.UE
This option together with
\&\fI-billionLaughsAttackProtectionMaximumAmplification\fR
gives control over the parser limits that protects
against billion laugh attacks
().
This option expects a positiv integer as argument. You
should never need to use this option, because the
default value (8388608) should work for any real data.
If you ever need to increase this value for non-attack
payload, please report.
.SH " COMMAND METHODS "
.TP
\&\fB\fBparser\fP \fBconfigure\fP \fIoption value ?option value?\fB
\&\fR
.RS
.PP
Sets configuration options for the parser. Every command
option, except \fI-namespace\fR can be set or modified with this method.
.RE
.TP
\&\fB\fBparser\fP \fBcget\fP \fI?-handlerset name? option\fB
\&\fR
.RS
.PP
Return the current configuration value option for the
parser.
.PP
If the -handlerset option is used, the configuration for the
named handler set is returned.
.RE
.TP
\&\fB\fBparser\fP \fBcurrentmarkup\fP
\&\fR
.RS
.PP
Returns the current markup as found in the XML, if
called from within one of its markup event handler script
(-elementstartcommand, -elementendcommand, -commentcommand
and -processinginstructioncommand). Otherwise it return the
empty string.
.RE
.TP
\&\fB\fBparser\fP \fBdelete\fP
\&\fR
.RS
.PP
Deletes the parser and the parser command. A parser cannot
be deleted from within one of its handler callbacks (neither directly nor
indirectly) and will raise a tcl error in this case.
.RE
.TP
\&\fB\fBparser\fP \fBfree\fP
\&\fR
.RS
.PP
Another name to call the method \fIdelete\fR, see
there.
.RE
.TP
\&\fB\fBparser\fP \fBget\fP \fI-specifiedattributecount|-idattributeindex|-currentbytecount|-currentlinenumber|-currentcolumnnumber|-currentbyteindex\fB
\&\fR
.RS
.IP "\fB-specifiedattributecount\fR"
.RS
.PP
Returns the number of the attribute/value pairs
passed in last call to the elementstartcommand that were specified in the
start-tag rather than defaulted. Each attribute/value pair counts as 2; thus
this corresponds to an index into the attribute list passed to the
elementstartcommand.
.RE
.IP "\fB-idattributeindex\fR"
.RS
.PP
Returns the index of the ID attribute passed in the
last call to XML_StartElementHandler, or -1 if there is no ID attribute.  Each
attribute/value pair counts as 2; thus this corresponds to an index into the
attributes list passed to the elementstartcommand.
.RE
.IP "\fB-currentbytecount\fR"
.RS
.PP
Return the number of bytes in the current event.
Returns 0 if the event is in an internal entity.
.RE
.IP "\fB-currentlinenumber\fR"
.RS
.PP
Returns the line number of the current parse
location.
.RE
.IP "\fB-currentcolumnnumber\fR"
.RS
.PP
Returns the column number of the current parse
location.
.RE
.IP "\fB-currentbyteindex\fR"
.RS
.PP
Returns the byte index of the current parse
location.
.RE
.PP
Only one value may be requested at a time.
.RE
.TP
\&\fB\fBparser\fP \fBparse\fP \fIdata\fB
\&\fR
.RS
.PP
Parses the XML string \fIdata\fR. The event callback
scripts will be called, as there triggering events happens. This method cannot
be used from within a callback (neither directly nor indirectly) of
the parser to be used and will raise an error in this case.
.RE
.TP
\&\fB\fBparser\fP \fBparsechannel\fP \fIchannelID\fB
\&\fR
.RS
.PP
Reads the XML data out of the tcl channel \fIchannelID\fR
(starting at the current access position, without any seek) up to the end of
file condition and parses that data. The channel encoding is respected. Use the
helper proc tDOM::xmlOpenFile out of the tDOM script library to open a file, if
you want to use this method. This method cannot
be used from within a callback (neither directly nor indirectly) of
the parser to be used and will raise an error in this case.
.RE
.TP
\&\fB\fBparser\fP \fBparsefile\fP \fIfilename\fB
\&\fR
.RS
.PP
Reads the XML data directly out of the file with the
filename \fIfilename\fR and parses that data. This is done with low level file
operations. The XML data must be in US-ASCII, ISO-8859-1, UTF-8 or UTF-16
encoding. If applicable, this is the fastest way, to parse XML data. This
method cannot be used from within a callback (neither directly nor indirectly)
of the parser to be used and will raise an error in this case.
.RE
.TP
\&\fB\fBparser\fP \fBreset\fP
\&\fR
.RS
.PP
Resets the parser in preparation for parsing another
document. A parser cannot be reset from within one of its handler callbacks
(neither directly nor indirectly) and will raise a tcl error in this
cases.
.RE
.SH "Callback Command Return Codes"
.PP
A script invoked for any of the parser callback commands, such as
-elementstartcommand, -elementendcommand, etc, may return an error code other
than "ok" or "error". All callbacks may in addition return
"break" or "continue".
.PP
If a callback script returns an "error" error code then
processing of the document is terminated and the error is propagated in the
usual fashion.
.PP
If a callback script returns a "break" error code then all
further processing of every handler script out of this Tcl handler set is
suppressed for the further parsing. This does not influence any other handler
set.
.PP
If a callback script returns a "continue" error code then
processing of the current element, and its children, ceases for every handler
script out of this Tcl handler set and processing continues with the next
(sibling) element. This does not influence any other handler set.
.PP
If a callback script returns a "return" error
code then parsing is canceled altogether, but no error is raised.
.SH "SEE ALSO"
expatapi, tdom
.SH KEYWORDS
SAX, push, pushparser