File: html4-parser.sml

package info (click to toggle)
smlnj 110.79-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 82,552 kB
  • sloc: ansic: 32,532; asm: 6,314; sh: 2,303; makefile: 1,821; perl: 1,170; pascal: 295; yacc: 190; cs: 78; python: 77; lisp: 19
file content (707 lines) | stat: -rw-r--r-- 30,300 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
(* html4-parser.sml
 *
 * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org)
 * All rights reserved.
 *
 * Defines the HTML4Parser structure, which defunctorizes the
 * automatically generated parser, defines an additional set of
 * utilities for working with the parser.
 *)

structure HTML4Parser = struct

structure H4 = HTML4
structure H4U = HTML4Utils
structure H4T = HTML4Tokens
structure H4TU = HTML4TokenUtils
structure AtomMap = H4TU.AtomMap

local
    structure TheParser = HTML4ParseFn(HTML4Lexer)
in
open TheParser
end

fun parseStream inStream =
    let
        val sourceMap = AntlrStreamPos.mkSourcemap ()
        val lex = HTML4Lexer.lex sourceMap
        val stream = HTML4Lexer.streamifyInstream inStream
        val (result, _, _) = parse lex stream
    in
        result
    end

exception IllFormedHTMLParseStream of
          H4T.token H4U.parsevisitation H4U.stream * string option

val tokVisitationToString = H4U.visitationToString H4T.toString

val strVisitationToString = H4U.visitationToString (fn x : string => x)

val visitationSimilar = let
    fun tokSimilarToString (tok1, tokStr2) = (H4T.toString tok1) = tokStr2
in H4U.visitationSame tokSimilarToString end

fun expect expectedVisit pstrm =
    let val pstrmHd = H4U.stream_hd pstrm
            handle _ => H4U.VisitT H4T.EOF
        fun expectationError () =
            let val msg = String.concat
                              ["Expected ",
                               strVisitationToString expectedVisit, ", got ",
                               tokVisitationToString pstrmHd, " instead."]
            in IllFormedHTMLParseStream(pstrm, SOME msg) end
    in
        if visitationSimilar(pstrmHd, expectedVisit) then H4U.stream_tl pstrm
        else raise (expectationError())
    end

fun expectEnterNT nt = expect (H4U.EnterNT (Atom.atom nt))

fun expectExitNT nt = expect (H4U.ExitNT (Atom.atom nt))

fun expectVisitT tokStr = expect (H4U.VisitT tokStr)

fun expectEnterNTInDomain ntMap pstrm = let
      val pstrmHd = H4U.stream_hd pstrm
            handle _ => H4U.VisitT H4T.EOF
      fun expectationError () = let
	    val expectedNTs = String.concatWith ", " (map Atom.toString (AtomMap.listKeys ntMap))
	    val msg = String.concat [
                    "Expected entry of one of ", expectedNTs, "; got ",
		    tokVisitationToString pstrmHd, " instead."
		  ]
            in
	      IllFormedHTMLParseStream(pstrm, SOME msg)
	    end
      in
	case pstrmHd
	 of H4U.EnterNT ntAtom =>
	      if AtomMap.inDomain (ntMap, ntAtom)
		then AtomMap.lookup (ntMap, ntAtom)
		else raise (expectationError ())
	  | _ => raise (expectationError ())
        (* end case *)
      end

fun optional optVisit (strm as H4U.StreamCons(strmHd, _)) =
    if visitationSimilar(strmHd, optVisit)
    then (H4U.stream_tl strm, SOME strmHd)
    else (strm, NONE)
  | optional _ _ = (H4U.StreamNil, NONE)

fun optVisitTok tokName strm =
    case optional (H4U.VisitT tokName) strm
     of (strm', SOME (H4U.VisitT tok)) => (strm', SOME tok)
      | _ => (strm, NONE)

fun isEnterNT nt pstrm = (expectEnterNT nt pstrm; true)
    handle IllFormedHTMLParseStream _ => false

fun isExitNT nt pstrm = (expectExitNT nt pstrm; true)
    handle IllFormedHTMLParseStream _ => false

fun isVisitT tokStr pstrm = (expectVisitT tokStr pstrm; true)
    handle IllFormedHTMLParseStream _ => false

fun isEither (is1, is2) pstrm = (is1 pstrm) orelse (is2 pstrm)

fun streamSkipUntil _ H4U.StreamNil = H4U.StreamNil
  | streamSkipUntil pred (strm as H4U.StreamCons (strmHd, _)) =
    if pred strmHd then strm else streamSkipUntil pred (H4U.stream_tl strm)

fun streamSkipWhile pred = streamSkipUntil (fn strmHd => not (pred strmHd))

fun streamConsumeUntil consumer until strm =
    let fun streamConsumeUntil' strm' acc =
            if until strm' then (strm', rev acc)
            else let val (strm'', consumerVal) = consumer strm'
                 in streamConsumeUntil' strm'' (consumerVal :: acc) end
    in streamConsumeUntil' strm [] end

fun tokIsSpace (H4T.PCDATA pcstr) = CharVector.all Char.isSpace pcstr
  | tokIsSpace _ = false

fun tokIsComment (H4T.COMMENT _) = true
  | tokIsComment _ = false

fun visitationIsSpace (H4U.VisitT tok) = tokIsSpace tok
  | visitationIsSpace _ = false

(* XXX I don't like the solution of skipping both whitespace and
comments, but I don't know how to munge CDATA and comments into block
elements, given the current HTML 4 data structure (I could add these,
but it would break the "purity" of the existing data type). *)

fun visitationIsSpaceOrComment (H4U.VisitT tok) = (tokIsSpace tok) orelse
                                                  (tokIsComment tok)
  | visitationIsSpaceOrComment _ = false

val skipWhitespace = streamSkipWhile visitationIsSpace

val skipWhitespaceOrComment = streamSkipWhile visitationIsSpaceOrComment

fun tokIsCdata (H4T.PCDATA _) = true
  | tokIsCdata (H4T.ENTITY_REF _) = true
  | tokIsCdata (H4T.CHAR_REF _) = true
  | tokIsCdata (H4T.COMMENT _) = true
  | tokIsCdata _ = false

fun isNotCdata (H4U.StreamCons(H4U.VisitT tok, _)) = not (tokIsCdata tok)
  | isNotCdata _ = true

exception InvalidToken of H4T.token

fun tokToCdata (H4T.PCDATA str) = H4.PCDATA str
  | tokToCdata (H4T.ENTITY_REF ent) = H4.ENTITY ent
  | tokToCdata (H4T.CHAR_REF chr) = H4.CHAR chr
  | tokToCdata (H4T.COMMENT cmt) = H4.COMMENT cmt
  | tokToCdata tok = raise (InvalidToken tok)

(*+DEBUG*)
fun tokToString (H4T.DOCTYPE doctypeStr) = doctypeStr
  | tokToString (H4T.PCDATA dataStr) = ("PCDATA \"" ^ (String.toString dataStr)
                                        ^ "\"")
  | tokToString (H4T.COMMENT commentStr) = commentStr
  | tokToString tok = H4TU.tokToString tok

fun printVisitationStream strm =
    print ((H4U.visitationToString tokToString (H4U.stream_hd strm)) ^ "\n")

fun printIllFormedErr (IllFormedHTMLParseStream (strm, msgOpt)) = (
      print "Error in parse stream at: ";
      printVisitationStream strm;
      (case msgOpt of SOME msg => print(concat["Message: ", msg, "\n"]) | _ => ()))
  | printIllFormedErr exn = raise exn
(*-DEBUG*)

fun getAttrsFromStream (H4U.StreamCons (H4U.VisitT tok, _)) =
    (case H4TU.tokGetAttrs tok
      of SOME attrs => attrs
       | NONE => [])
  | getAttrsFromStream _ = []

fun html0aryFromParseStream tag ctor pstrm =
    let val pstrm1 = expectEnterNT tag pstrm
        val pstrm2 = expectVisitT ("START" ^ tag) pstrm1
        val attrs = getAttrsFromStream pstrm1
        val pstrm3 = expectExitNT tag (skipWhitespaceOrComment pstrm2)
    in
        (pstrm3, SOME (ctor attrs))
    end

fun listOfOptsToList lst = map Option.valOf lst
(*DEBUG*) handle ex => raise ex

fun htmlNaryFromParseStream tag ctor childFromParseStream pstrm0 =
    let val endTag = "END" ^ tag
        val pstrm1 = expectEnterNT tag pstrm0
        val pstrm2 = expectVisitT ("START" ^ tag) pstrm1
        val attrs = getAttrsFromStream pstrm1
        val (pstrm3, children) =
            streamConsumeUntil childFromParseStream
                               (isEither (isVisitT endTag, isExitNT tag))
                               (skipWhitespaceOrComment pstrm2)
        val (pstrm4, _) = optVisitTok endTag pstrm3
        val pstrm5 = expectExitNT tag (skipWhitespaceOrComment pstrm4)
    in (pstrm5, SOME (ctor (attrs, listOfOptsToList children))) end

type parseVisitStream = H4T.token H4U.parsevisitation H4U.stream

(* FIXME: might as well use AtomTable.hash_table for these, since we are initializing them later *)
val headContentNTMap : (parseVisitStream -> parseVisitStream * H4.head_content option) AtomMap.map ref =
      ref AtomMap.empty

val blockNTMap : (parseVisitStream -> parseVisitStream * H4.block option) AtomMap.map ref =
      ref AtomMap.empty

val inlineNTMap : (parseVisitStream -> parseVisitStream * H4.inline option) AtomMap.map ref =
      ref AtomMap.empty

val tableDataNTMap : (parseVisitStream -> parseVisitStream * H4.table_data option) AtomMap.map ref =
      ref AtomMap.empty

fun cvtBlock ctor (SOME block) = SOME (ctor block)
  | cvtBlock _ NONE = NONE

fun cvtInline ctor (SOME inline) = SOME (ctor inline)
  | cvtInline _ NONE = NONE

fun cvtFlow ctor (SOME flow) = SOME (ctor flow)
  | cvtFlow _ _ = NONE

fun cvtOption ctor (SOME htmlopt) = SOME (ctor htmlopt)
  | cvtOption _ _ = NONE

fun cvtParam ctor (SOME param) = SOME (ctor param)
  | cvtParam _ _ = NONE

fun cvtFrameset ctor (SOME frameset) = SOME (ctor frameset)
  | cvtFrameset _ NONE = NONE

fun cvtScript ctor (SOME script) = SOME (ctor script)
  | cvtScript _ _ = NONE

fun cdataFromParseStream pstrm =
    if isNotCdata pstrm
    then raise (IllFormedHTMLParseStream(pstrm,
                                         SOME "Expected character data."))
    else
        let val pstrmHd = H4U.stream_hd pstrm
            val pstrmTl = H4U.stream_tl pstrm
        in case pstrmHd
            of H4U.VisitT tok => (pstrmTl, SOME (tokToCdata tok))
             | _ => (pstrmTl, NONE)
        end

fun htmlFromParseStream pstrm0 =
    let val pstrm1 =
            (skipWhitespaceOrComment o (expectEnterNT "DOCUMENT")) pstrm0
        val (pstrm2, doctypeTokOpt) = optVisitTok "DOCTYPE" pstrm1
        val theVersion = (case doctypeTokOpt
                           of SOME (H4T.DOCTYPE doctype) => SOME doctype
                            | _ => NONE)
        val (pstrm3, starthtmlTokOpt) =
            optVisitTok "STARTHTML" (skipWhitespaceOrComment pstrm2)
        val (pstrm4, headDataListOpt) = headFromParseStream
                                            (skipWhitespaceOrComment pstrm3)
    in if not (isSome headDataListOpt)
      then (pstrm4, NONE)
      else (case bodyOrFramesetFromParseStream pstrm4
	of (pstrm5, SOME content) => let
	     val (pstrm6, _) = optVisitTok "ENDHTML" pstrm5
	     val pstrm7 = (skipWhitespaceOrComment o
			  (expectExitNT "DOCUMENT") o
			  skipWhitespaceOrComment) pstrm6
	     in (
	       pstrm7,
	       SOME (H4.HTML{
		   version = theVersion,
		   head = [],
		   content = content
		 })
	     ) end
	 | (pstrm5, NONE) => (pstrm5, NONE)
       (* end case *))
    end
and headFromParseStream pstrm0 =
    let val pstrm1 = (skipWhitespaceOrComment o (expectEnterNT "HEAD")) pstrm0
        val (pstrm2, startheadTokOpt) = optVisitTok "STARTHEAD" pstrm1
        val (pstrm3, children) =
            streamConsumeUntil headContentFromParseStream
                               (isEither(isExitNT "HEAD", isVisitT "ENDHEAD"))
                               (skipWhitespaceOrComment pstrm2)
        val (pstrm4, _) = optVisitTok "ENDHEAD" pstrm3
        val pstrm5 = expectExitNT "HEAD" (skipWhitespaceOrComment pstrm4)
    in (pstrm5, SOME (listOfOptsToList children)) end
and headContentFromParseStream pstrm =
    let val ntFunc = expectEnterNTInDomain (!headContentNTMap) pstrm
        val (pstrm', resultOpt) = ntFunc pstrm
    in (skipWhitespaceOrComment pstrm', resultOpt) end
and bodyOrFramesetFromParseStream pstrm =
    let fun cvtBody (SOME body) = SOME (H4.BodyOrFrameset_BODY body)
          | cvtBody _ = NONE
    in
        if isEnterNT "BODY" pstrm
        then let val (pstrm', bodyOpt) = bodyFromParseStream pstrm
             in (pstrm', cvtBody bodyOpt) end
        else let val (pstrm', framesetOpt) = framesetFromParseStream pstrm
             in (pstrm',
                 cvtFrameset H4.BodyOrFrameset_FRAMESET framesetOpt) end
    end
and bodyFromParseStream pstrm0 =
    let val pstrm1 = expectEnterNT "BODY" pstrm0
        val (pstrm2, startbodyTokOpt) = optVisitTok "STARTBODY" pstrm1
        val attrs = (case startbodyTokOpt
	       of SOME startbody => (case H4TU.tokGetAttrs startbody
		     of SOME attrs => attrs
		      | NONE => []
		    (* end case *))
		| NONE => []
	      (* end case *))
        val (pstrm3, children) =
            streamConsumeUntil blockOrScriptFromParseStream
                               (isEither(isExitNT "BODY", isVisitT "ENDBODY"))
                               (skipWhitespaceOrComment pstrm2)
        val (pstrm4, _) = optVisitTok "ENDBODY" pstrm3
        val pstrm5 = expectExitNT "BODY" (skipWhitespaceOrComment pstrm4)
    in (pstrm5, SOME (H4.BODY (attrs, listOfOptsToList children))) end 
and framesetFromParseStream pstrm0 =
    let val pstrm1 = expectEnterNT "FRAMESET" pstrm0
        val pstrm2 = expectVisitT "STARTFRAMESET" pstrm1
        val attrs = getAttrsFromStream pstrm1
        val (pstrm3, children) =
            streamConsumeUntil framesetOrFrameFromParseStream
                               (isEither(isVisitT "ENDFRAMESET",
                                         isEnterNT "NOFRAMES"))
                               (skipWhitespaceOrComment pstrm2)
        val (pstrm4, noframesOpt) =
            if isEnterNT "NOFRAMES" pstrm3 then
                let val (pstrm4', noframesOpt') =
                        noFramesFromParseStream pstrm3
                in (skipWhitespaceOrComment pstrm4', noframesOpt') end
            else (pstrm3, NONE)
        val pstrm5 = expectVisitT "ENDFRAMESET" pstrm4
        val pstrm6 = expectExitNT "FRAMESET" (skipWhitespaceOrComment pstrm5)
    in
        (pstrm6, SOME (H4.FRAMESET (attrs, listOfOptsToList children,
                                    noframesOpt)))
    end
and framesetOrFrameFromParseStream pstrm0 =
    let val pstrm1 = skipWhitespaceOrComment pstrm0
        val (pstrm2, result) =
            if isEnterNT "FRAMESET" pstrm1
            then let val (pstrm', result') = framesetFromParseStream pstrm1
                 in (pstrm',
                     cvtFrameset H4.FramesetOrFrame_FRAMESET result') end
            else html0aryFromParseStream "FRAME" H4.FRAME pstrm1
    in (skipWhitespaceOrComment pstrm2, result) end
and noFramesFromParseStream pstrm0 =
    let val pstrm1 = expectEnterNT "NOFRAMES" pstrm0
        val pstrm2 = expectVisitT "STARTNOFRAMES" pstrm1
        val attrs = getAttrsFromStream pstrm1
        val (pstrm3, bodyOpt) = bodyFromParseStream pstrm2
        val pstrm4 = expectVisitT "ENDNOFRAMES" pstrm3
        val pstrm5 = expectExitNT "NOFRAMES" pstrm4
    in (pstrm5, SOME (H4.NOFRAMES (attrs, valOf bodyOpt)))
(* DEBUG *)handle ex => raise ex
end
and flowFromParseStream pstrm =
    let val pstrmHd = H4U.stream_hd pstrm
        fun procInline pstrm =
            let val (pstrm', result') = inlineFromParseStream pstrm
            in (pstrm', cvtInline H4.Flow_INLINE result') end
    in case pstrmHd
        of H4U.EnterNT ntAtom =>
           if AtomMap.inDomain (!blockNTMap, ntAtom)
           then let val (pstrm', result') = blockFromParseStream pstrm
                in (pstrm', cvtBlock H4.Flow_BLOCK result') end
           else procInline pstrm
         | _ => procInline pstrm
    end
and blockFromParseStream pstrm =
    (expectEnterNTInDomain (!blockNTMap) pstrm) pstrm
and inlineFromParseStream pstrm =
    let val pstrmHd = H4U.stream_hd pstrm
    in case pstrmHd
        of H4U.VisitT tok =>
           let val (pstrm', cdataOptList) =
                   streamConsumeUntil cdataFromParseStream isNotCdata pstrm
           in (pstrm', SOME (H4.CDATA (listOfOptsToList cdataOptList))) end
         | _ => (expectEnterNTInDomain (!inlineNTMap) pstrm) pstrm
    end
and listItemFromParseStream pstrm =
    htmlNaryFromParseStream "LI" H4.LI flowFromParseStream pstrm
and scriptFromParseStream pstrm =
    htmlNaryFromParseStream "SCRIPT" H4.SCRIPT cdataFromParseStream pstrm
and paramFromParseStream pstrm =
    html0aryFromParseStream "PARAM" H4.PARAM pstrm
and legendFromParseStream pstrm =
    htmlNaryFromParseStream "LEGEND" H4.LEGEND inlineFromParseStream pstrm
and defTermOrDescFromParseStream pstrm =
    if isEnterNT "DT" pstrm
    then htmlNaryFromParseStream "DT" H4.DT inlineFromParseStream pstrm
    else htmlNaryFromParseStream "DD" H4.DD flowFromParseStream pstrm
and tableDataFromParseStream pstrm =
    (expectEnterNTInDomain (!tableDataNTMap) pstrm) pstrm
and trFromParseStream pstrm =
    htmlNaryFromParseStream "TR" H4.TR thOrTdFromParseStream pstrm
and thOrTdFromParseStream pstrm =
    if isEnterNT "TH" pstrm
    then htmlNaryFromParseStream "TH" H4.TH flowFromParseStream pstrm
    else htmlNaryFromParseStream "TD" H4.TD flowFromParseStream pstrm
and optgroupOrOptionFromParseStream pstrm =
    if isEnterNT "OPTGROUP" pstrm
      then let
	fun parseOpt pstrm = (case htmlOptionFromParseStream pstrm
	       of (pstrm', SOME(H4.OPTION stuff)) => (pstrm', SOME stuff)
		| (pstrm', _) => (pstrm', NONE)
	      (* end case *))
	in htmlNaryFromParseStream "OPTGROUP" H4.OPTGROUP parseOpt pstrm end
      else htmlOptionFromParseStream pstrm
and htmlOptionFromParseStream pstrm =
    htmlNaryFromParseStream "OPTION" H4.OPTION cdataFromParseStream pstrm
and flowOrParamFromParseStream pstrm =
    if isEnterNT "PARAM" pstrm
    then let val (pstrm', paramOpt) = paramFromParseStream pstrm
         in (pstrm', cvtParam H4.FlowOrParam_PARAM paramOpt) end
    else let val (pstrm', flowOpt) = flowFromParseStream pstrm
         in (pstrm', cvtFlow H4.FlowOrParam_FLOW flowOpt) end
and blockOrScriptFromParseStream pstrm =
    if isEnterNT "SCRIPT" pstrm
    then let val (pstrm', scriptOpt) = scriptFromParseStream pstrm
         in (skipWhitespaceOrComment pstrm',
             cvtScript H4.BlockOrScript_SCRIPT scriptOpt) end
    else let val (pstrm', blockOpt) = blockFromParseStream pstrm
         in (skipWhitespaceOrComment pstrm',
             cvtBlock H4.BlockOrScript_BLOCK blockOpt) end
and blockOrAreaFromParseStream pstrm =
    if isEnterNT "AREA" pstrm
    then html0aryFromParseStream "AREA" H4.AREA pstrm
    else let val (pstrm', blockOpt) = blockFromParseStream pstrm
         in (pstrm', cvtBlock H4.BlockOrArea_BLOCK blockOpt) end
and headObjectFromParseStream pstrm =
    htmlNaryFromParseStream "OBJECT" H4.Head_OBJECT flowOrParamFromParseStream
                            pstrm
and headScriptFromParseStream pstrm =
    let val (pstrm', scriptOpt) = scriptFromParseStream pstrm
    in (pstrm', cvtScript H4.Head_SCRIPT scriptOpt) end

val titleFromParseStream =
    htmlNaryFromParseStream "TITLE" H4.Head_TITLE cdataFromParseStream
val baseFromParseStream = html0aryFromParseStream "BASE" H4.Head_BASE
val metaFromParseStream = html0aryFromParseStream "META" H4.Head_META
val linkFromParseStream = html0aryFromParseStream "LINK" H4.Head_LINK
val pFromParseStream = htmlNaryFromParseStream "P" H4.P inlineFromParseStream
val h1FromParseStream =
    htmlNaryFromParseStream "H1" H4.H1 inlineFromParseStream
val h2FromParseStream =
    htmlNaryFromParseStream "H2" H4.H2 inlineFromParseStream
val h3FromParseStream =
    htmlNaryFromParseStream "H3" H4.H3 inlineFromParseStream
val h4FromParseStream =
    htmlNaryFromParseStream "H4" H4.H4 inlineFromParseStream
val h5FromParseStream =
    htmlNaryFromParseStream "H5" H4.H5 inlineFromParseStream
val h6FromParseStream =
    htmlNaryFromParseStream "H6" H4.H6 inlineFromParseStream
val ulFromParseStream =
    htmlNaryFromParseStream "UL" H4.UL listItemFromParseStream
val olFromParseStream =
    htmlNaryFromParseStream "OL" H4.OL listItemFromParseStream
val dirFromParseStream =
    htmlNaryFromParseStream "DIR" H4.DIR listItemFromParseStream
val menuFromParseStream =
    htmlNaryFromParseStream "MENU" H4.MENU listItemFromParseStream
val preFromParseStream =
    (* XXX This will not properly track whitespace currently. *)
    htmlNaryFromParseStream "PRE" H4.PRE inlineFromParseStream
val dlFromParseStream =
    htmlNaryFromParseStream "DL" H4.DL defTermOrDescFromParseStream
val divFromParseStream =
    htmlNaryFromParseStream "DIV" H4.DIV flowFromParseStream
val noscriptFromParseStream =
    htmlNaryFromParseStream "NOSCRIPT" H4.NOSCRIPT blockFromParseStream
val blockquoteFromParseStream =
    htmlNaryFromParseStream "BLOCKQUOTE" H4.BLOCKQUOTE
                            blockOrScriptFromParseStream
val formFromParseStream =
    htmlNaryFromParseStream "FORM" H4.FORM blockOrScriptFromParseStream
val hrFromParseStream = html0aryFromParseStream "HR" H4.HR
val tableFromParseStream =
    htmlNaryFromParseStream "TABLE" H4.TABLE tableDataFromParseStream
fun fieldsetFromParseStream pstrm0 =
    let val pstrm1 = expectEnterNT "FIELDSET" pstrm0
        val pstrm2 = expectVisitT "STARTFIELDSET" pstrm1
        val attrs = getAttrsFromStream pstrm1
        val (pstrm3, legendOpt) =
            legendFromParseStream (skipWhitespaceOrComment pstrm2)
        val (pstrm4, flows) =
            streamConsumeUntil flowFromParseStream (isVisitT "ENDFIELDSET")
                               pstrm3
        val pstrm5 = expectVisitT "ENDFIELDSET" pstrm4
        val pstrm6 = expectExitNT "FIELDSET" pstrm5
    in (pstrm5, SOME (H4.FIELDSET (attrs, legendOpt,
                                   listOfOptsToList flows))) end
val addressFromParseStream =
    htmlNaryFromParseStream "ADDRESS" H4.ADDRESS inlineFromParseStream
val centerFromParseStream =
    htmlNaryFromParseStream "CENTER" H4.CENTER flowFromParseStream
val isindexFromParseStream = html0aryFromParseStream "ISINDEX" H4.ISINDEX
val ttFromParseStream =
    htmlNaryFromParseStream "TT" H4.TT inlineFromParseStream
val iFromParseStream =
    htmlNaryFromParseStream "I" H4.I inlineFromParseStream
val bFromParseStream =
    htmlNaryFromParseStream "B" H4.B inlineFromParseStream
val bigFromParseStream =
    htmlNaryFromParseStream "BIG" H4.BIG inlineFromParseStream
val smallFromParseStream =
    htmlNaryFromParseStream "SMALL" H4.SMALL inlineFromParseStream
val uFromParseStream =
    htmlNaryFromParseStream "U" H4.U inlineFromParseStream
val sFromParseStream =
    htmlNaryFromParseStream "S" H4.S inlineFromParseStream
val strikeFromParseStream =
    htmlNaryFromParseStream "STRIKE" H4.STRIKE inlineFromParseStream
val emFromParseStream =
    htmlNaryFromParseStream "EM" H4.EM inlineFromParseStream
val strongFromParseStream =
    htmlNaryFromParseStream "STRONG" H4.STRONG inlineFromParseStream
val dfnFromParseStream =
    htmlNaryFromParseStream "DFN" H4.DFN inlineFromParseStream
val codeFromParseStream =
    htmlNaryFromParseStream "CODE" H4.CODE inlineFromParseStream
val sampFromParseStream =
    htmlNaryFromParseStream "SAMP" H4.SAMP inlineFromParseStream
val kbdFromParseStream =
    htmlNaryFromParseStream "KBD" H4.KBD inlineFromParseStream
val varFromParseStream =
    htmlNaryFromParseStream "VAR" H4.VAR inlineFromParseStream
val citeFromParseStream =
    htmlNaryFromParseStream "CITE" H4.CITE inlineFromParseStream
val abbrFromParseStream =
    htmlNaryFromParseStream "ABBR" H4.ABBR inlineFromParseStream
val acronymFromParseStream =
    htmlNaryFromParseStream "ACRONYM" H4.ACRONYM inlineFromParseStream
val aFromParseStream =
    htmlNaryFromParseStream "A" H4.A inlineFromParseStream
val imgFromParseStream =
    html0aryFromParseStream "IMG" H4.IMG
val objectFromParseStream =
    htmlNaryFromParseStream "OBJECT" H4.OBJECT flowOrParamFromParseStream
val brFromParseStream =
    html0aryFromParseStream "BR" H4.BR
fun inlineScriptFromParseStream pstrm =
    let val (pstrm', scriptOpt) = scriptFromParseStream pstrm
    in (pstrm', cvtScript H4.Inline_SCRIPT scriptOpt) end
val mapFromParseStream =
    htmlNaryFromParseStream "MAP" H4.MAP blockOrAreaFromParseStream
val qFromParseStream =
    htmlNaryFromParseStream "Q" H4.Q inlineFromParseStream
val subFromParseStream =
    htmlNaryFromParseStream "SUB" H4.SUB inlineFromParseStream
val supFromParseStream =
    htmlNaryFromParseStream "SUP" H4.SUP inlineFromParseStream
val spanFromParseStream =
    htmlNaryFromParseStream "SPAN" H4.SPAN inlineFromParseStream
val bdoFromParseStream =
    htmlNaryFromParseStream "BDO" H4.BDO inlineFromParseStream
val appletFromParseStream =
    htmlNaryFromParseStream "APPLET" H4.APPLET flowOrParamFromParseStream
val basefontFromParseStream =
    html0aryFromParseStream "BASEFONT" H4.BASEFONT
val fontFromParseStream =
    htmlNaryFromParseStream "FONT" H4.FONT inlineFromParseStream
val iframeFromParseStream =
    htmlNaryFromParseStream "IFRAME" H4.IFRAME flowFromParseStream
val inputFromParseStream =
    html0aryFromParseStream "INPUT" H4.INPUT
val selectFromParseStream =
    htmlNaryFromParseStream "SELECT" H4.SELECT optgroupOrOptionFromParseStream
val textareaFromParseStream =
    htmlNaryFromParseStream "TEXTAREA" H4.TEXTAREA cdataFromParseStream
val labelFromParseStream =
    htmlNaryFromParseStream "LABEL" H4.LABEL inlineFromParseStream
val buttonFromParseStream =
    htmlNaryFromParseStream "BUTTON" H4.BUTTON flowFromParseStream
val captionFromParseStream =
    htmlNaryFromParseStream "CAPTION" H4.CAPTION inlineFromParseStream
val colFromParseStream =
    html0aryFromParseStream "COL" H4.COL
val colgroupFromParseStream =
    let fun consumeCol pstrm =
            let val (pstrm', colOptVal) = colFromParseStream pstrm
                fun cvtCol (SOME (H4.COL attrs)) = SOME attrs
                  | cvtCol _ = NONE
            in (skipWhitespaceOrComment pstrm', cvtCol colOptVal) end
    in htmlNaryFromParseStream "COLGROUP" H4.COLGROUP consumeCol end
val theadFromParseStream =
    htmlNaryFromParseStream "THEAD" H4.THEAD trFromParseStream
val tfootFromParseStream =
    htmlNaryFromParseStream "TFOOT" H4.TFOOT trFromParseStream
val tbodyFromParseStream =
    htmlNaryFromParseStream "TBODY" H4.TBODY trFromParseStream

val _ =
    (headContentNTMap
     := (foldl AtomMap.insert' AtomMap.empty
               [ (Atom.atom "TITLE", titleFromParseStream),
                 (Atom.atom "BASE", baseFromParseStream),
                 (Atom.atom "SCRIPT", headScriptFromParseStream),
                 (Atom.atom "META", metaFromParseStream),
                 (Atom.atom "LINK", linkFromParseStream),
                 (Atom.atom "OBJECT", headObjectFromParseStream)]),
     blockNTMap
     := (foldl AtomMap.insert' AtomMap.empty
               [ (Atom.atom "P", pFromParseStream),
                 (Atom.atom "H1", h1FromParseStream),
                 (Atom.atom "H2", h2FromParseStream),
                 (Atom.atom "H3", h3FromParseStream),
                 (Atom.atom "H4", h4FromParseStream),
                 (Atom.atom "H5", h5FromParseStream),
                 (Atom.atom "H6", h6FromParseStream),
                 (Atom.atom "UL", ulFromParseStream),
                 (Atom.atom "OL", olFromParseStream),
                 (Atom.atom "DIR", dirFromParseStream),
                 (Atom.atom "MENU", menuFromParseStream),
                 (Atom.atom "PRE", preFromParseStream),
                 (Atom.atom "DL", dlFromParseStream),
                 (Atom.atom "DIV", divFromParseStream),
                 (Atom.atom "NOSCRIPT", noscriptFromParseStream),
                 (Atom.atom "BLOCKQUOTE", blockquoteFromParseStream),
                 (Atom.atom "FORM", formFromParseStream),
                 (Atom.atom "HR", hrFromParseStream),
                 (Atom.atom "TABLE", tableFromParseStream),
                 (Atom.atom "FIELDSET", fieldsetFromParseStream),
                 (Atom.atom "ADDRESS", addressFromParseStream),
                 (Atom.atom "ISINDEX", isindexFromParseStream),
                 (Atom.atom "CENTER", centerFromParseStream)]),
     inlineNTMap
     := (foldl AtomMap.insert' AtomMap.empty
               [ (Atom.atom "TT", ttFromParseStream),
                 (Atom.atom "I", iFromParseStream),
                 (Atom.atom "B", bFromParseStream),
                 (Atom.atom "BIG", bigFromParseStream),
                 (Atom.atom "SMALL", smallFromParseStream),
                 (Atom.atom "U", uFromParseStream),
                 (Atom.atom "S", sFromParseStream),
                 (Atom.atom "STRIKE", strikeFromParseStream),
                 (Atom.atom "EM", emFromParseStream),
                 (Atom.atom "STRONG", strongFromParseStream),
                 (Atom.atom "DFN", dfnFromParseStream),
                 (Atom.atom "CODE", codeFromParseStream),
                 (Atom.atom "SAMP", sampFromParseStream),
                 (Atom.atom "KBD", kbdFromParseStream),
                 (Atom.atom "VAR", varFromParseStream),
                 (Atom.atom "CITE", citeFromParseStream),
                 (Atom.atom "ABBR", abbrFromParseStream),
                 (Atom.atom "ACRONYM", acronymFromParseStream),
                 (Atom.atom "A", aFromParseStream),
                 (Atom.atom "IMG", imgFromParseStream),
                 (Atom.atom "OBJECT", objectFromParseStream),
                 (Atom.atom "BR", brFromParseStream),
                 (Atom.atom "SCRIPT", inlineScriptFromParseStream),
                 (Atom.atom "MAP", mapFromParseStream),
                 (Atom.atom "Q", qFromParseStream),
                 (Atom.atom "SUB", subFromParseStream),
                 (Atom.atom "SUP", supFromParseStream),
                 (Atom.atom "SPAN", spanFromParseStream),
                 (Atom.atom "BDO", bdoFromParseStream),
                 (Atom.atom "APPLET", appletFromParseStream),
                 (Atom.atom "BASEFONT", basefontFromParseStream),
                 (Atom.atom "FONT", fontFromParseStream),
                 (Atom.atom "IFRAME", iframeFromParseStream),
                 (Atom.atom "INPUT", inputFromParseStream),
                 (Atom.atom "SELECT", selectFromParseStream),
                 (Atom.atom "TEXTAREA", textareaFromParseStream),
                 (Atom.atom "LABEL", labelFromParseStream),
                 (Atom.atom "BUTTON", buttonFromParseStream)]),
     tableDataNTMap
     := (foldl AtomMap.insert' AtomMap.empty
               [ (Atom.atom "CAPTION", captionFromParseStream),
                 (Atom.atom "COL", colFromParseStream),
                 (Atom.atom "COLGROUP", colgroupFromParseStream),
                 (Atom.atom "THEAD", theadFromParseStream),
                 (Atom.atom "TFOOT", tfootFromParseStream),
                 (Atom.atom "TBODY", tbodyFromParseStream)])
    )

fun fromParseTree pt =
    let val (_, result) =
            htmlFromParseStream (H4U.parsetreeToVisitationStream pt)
    in result end

fun fromString str = let
    val pt_opt = parseStream (TextIO.openString str)
in case pt_opt
    of NONE => NONE 
     | SOME pt => fromParseTree pt
end

end (* HTML4ParserUtils *)

(* ______________________________________________________________________
   End of html4-parser.sml
   ______________________________________________________________________ *)