File: html4-printer.sml

package info (click to toggle)
smlnj 110.79-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 82,552 kB
  • sloc: ansic: 32,532; asm: 6,314; sh: 2,303; makefile: 1,821; perl: 1,170; pascal: 295; yacc: 190; cs: 78; python: 77; lisp: 19
file content (404 lines) | stat: -rw-r--r-- 15,751 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
(* html4-printer.sml
 *
 * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org)
 * All rights reserved.
 *)

structure HTML4Printer : sig
    val toString : HTML4.html -> string
end = struct

structure H4 = HTML4

structure PP = PrettyPrint

exception NotImplemented

structure HTML4TagStrings = struct
    (* Strict *)
    val A = "A"
    val ABBR = "ABBR"
    val ACRONYM = "ACRONYM"
    val ADDRESS = "ADDRESS"
    val AREA = "AREA"
    val B = "B"
    val BASE = "BASE"
    val BDO = "BDO"
    val BIG = "BIG"
    val BLOCKQUOTE = "BLOCKQUOTE"
    val BODY = "BODY"
    val BR = "BR"
    val BUTTON = "BUTTON"
    val CAPTION = "CAPTION"
    val CITE = "CITE"
    val CODE = "CODE"
    val COL = "COL"
    val COLGROUP = "COLGROUP"
    val DD = "DD"
    val DEL = "DEL"
    val DFN = "DFN"
    val DIV = "DIV"
    val DL = "DL"
    val DT = "DT"
    val EM = "EM"
    val FIELDSET = "FIELDSET"
    val FORM = "FORM"
    val H1 = "H1"
    val H2 = "H2"
    val H3 = "H3"
    val H4 = "H4"
    val H5 = "H5"
    val H6 = "H6"
    val HEAD = "HEAD"
    val HR = "HR"
    val HTML = "HTML"
    val I = "I"
    val IMG = "IMG"
    val INPUT = "INPUT"
    val INS = "INS"
    val KBD = "KBD"
    val LABEL = "LABEL"
    val LEGEND = "LEGEND"
    val LI = "LI"
    val LINK = "LINK"
    val MAP = "MAP"
    val META = "META"
    val NOSCRIPT = "NOSCRIPT"
    val OBJECT = "OBJECT"
    val OL = "OL"
    val OPTGROUP = "OPTGROUP"
    val OPTION = "OPTION"
    val P = "P"
    val PARAM = "PARAM"
    val PRE = "PRE"
    val Q = "Q"
    val SAMP = "SAMP"
    val SCRIPT = "SCRIPT"
    val SELECT = "SELECT"
    val SMALL = "SMALL"
    val SPAN = "SPAN"
    val STRONG = "STRONG"
    val STYLE = "STYLE"
    val SUB = "SUB"
    val SUP = "SUP"
    val TABLE = "TABLE"
    val TBODY = "TBODY"
    val TD = "TD"
    val TEXTAREA = "TEXTAREA"
    val TFOOT = "TFOOT"
    val TH = "TH"
    val THEAD = "THEAD"
    val TITLE = "TITLE"
    val TR = "TR"
    val TT = "TT"
    val UL = "UL"
    val VAR = "VAR"
    (* Frameset *)
    val FRAME = "FRAME"
    val FRAMESET = "FRAMESET"
    val NOFRAMES = "NOFRAMES"
    (* Loose *)
    val APPLET = "APPLET"
    val BASEFONT = "BASEFONT"
    val CENTER = "CENTER"
    val DIR = "DIR"
    val FONT = "FONT"
    val IFRAME = "IFRAME"
    val ISINDEX = "ISINDEX"
    val MENU = "MENU"
    val S = "S"
    val STRIKE = "STRIKE"
    val U = "U"
end

structure S = HTML4TagStrings

val strictStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"

val looseStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"

val framesetStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\" \"http://www.w3.org/TR/html4/frameset.dtd\">"

val xhtmlStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"

(* ____________________________________________________________ *)

fun isStrict _ = true

fun getVersionStr (doc as H4.HTML {head, content, ...}) =
    case content of
        H4.BodyOrFrameset_BODY (H4.BODY (attrs, children)) =>
        if isStrict children then strictStr else looseStr
      | H4.BodyOrFrameset_FRAMESET _ => framesetStr

(* ____________________________________________________________ *)

fun ppOpenTag ppstrm (tag, attributes) =
    PP.string ppstrm (String.concat
                      ["<", tag,
                       case attributes of
                           attrs as _::_ => " " ^ (HTML4Utils.attrsToStr attrs)
                         | [] => "",
                       ">"])

fun ppCloseTag ppstrm tag =
    PP.string ppstrm (String.concat ["</", tag, ">"])

fun ppChildren _ _ [] = ()
  | ppChildren ppstrm ppChild children = (
    PP.openHVBox ppstrm (PP.Abs 2);
    PP.newline ppstrm;
    app (ppChild ppstrm) children;
    PP.closeBox ppstrm;
    PP.newline ppstrm
    )

fun ppTagAndChildren ppstrm ppChild tag (attrs, children) = (
    ppOpenTag ppstrm (tag, attrs);
    ppChildren ppstrm ppChild children;
    ppCloseTag ppstrm tag
    )
                                                              
(* ____________________________________________________________ *)

local
    open H4
in
    fun ppCol ppstrm attrs = ppOpenTag ppstrm (S.COL, attrs)

    fun ppCdata ppstrm (CHAR chNum) =
        PP.string ppstrm ("&#" ^ (IntInf.toString chNum) ^ ";")
      | ppCdata ppstrm (COMMENT comment_string) =
        PP.string ppstrm comment_string
      | ppCdata ppstrm (ENTITY ent) =
        PP.string ppstrm ("&" ^ (Atom.toString ent) ^ ";")
      | ppCdata ppstrm (PCDATA string_data) = PP.string ppstrm string_data
    and ppHtml ppstrm (doc as HTML {version, head, content}) = (
        PP.string ppstrm (case version of
                              SOME doctype_str => doctype_str
                            | NONE => getVersionStr doc);
        PP.newline ppstrm;
        ppOpenTag ppstrm (S.HTML, []);
        PP.openHVBox ppstrm (PP.Abs 2);
        PP.newline ppstrm;
        ppOpenTag ppstrm (S.HEAD, []);
        ppChildren ppstrm ppHead_content head;
        ppCloseTag ppstrm S.HEAD;
        PP.newline ppstrm;
        ppBody_or_frameset ppstrm content;
        PP.closeBox ppstrm;
        PP.newline ppstrm;
        ppCloseTag ppstrm S.HTML;
        PP.newline ppstrm
    )
    and ppHead_content ppstrm (Head_BASE attrs) =
        ppOpenTag ppstrm (S.BASE, attrs)
      | ppHead_content ppstrm (Head_LINK attrs) =
        ppOpenTag ppstrm (S.LINK, attrs)
      | ppHead_content ppstrm (Head_META attrs) =
        ppOpenTag ppstrm (S.META, attrs)
      | ppHead_content ppstrm (Head_OBJECT contents) =
        ppTagAndChildren ppstrm ppFlow_or_param S.OBJECT contents
      | ppHead_content ppstrm (Head_SCRIPT child) = ppScript ppstrm child
      | ppHead_content ppstrm (Head_STYLE contents) =
        ppTagAndChildren ppstrm ppCdata S.STYLE contents
      | ppHead_content ppstrm (Head_TITLE contents) =
        ppTagAndChildren ppstrm ppCdata S.TITLE contents
    and ppBody_or_frameset ppstrm (BodyOrFrameset_BODY body) =
        ppBody ppstrm body
      | ppBody_or_frameset ppstrm (BodyOrFrameset_FRAMESET frameset) =
        ppFrameset ppstrm frameset
    and ppBody ppstrm (BODY content) =
        ppTagAndChildren ppstrm ppBlock_or_script S.BODY content
    and ppFrameset ppstrm (FRAMESET (attrs, children, noframesOpt)) = (
        ppOpenTag ppstrm (S.FRAMESET, attrs);
        ppChildren ppstrm ppFrameset_or_frame children;
        case noframesOpt of
            SOME noframes => (PP.newline ppstrm;
                              ppNoframes ppstrm noframes)
          | _ => ();
        ppCloseTag ppstrm S.FRAMESET
    )
    and ppFrameset_or_frame ppstrm (FRAME attrs) =
        ppOpenTag ppstrm (S.FRAME, attrs)
      | ppFrameset_or_frame ppstrm (FramesetOrFrame_FRAMESET frameset) =
        ppFrameset ppstrm frameset
    and ppNoframes ppstrm (NOFRAMES (attrs, body)) =
        (ppOpenTag ppstrm (S.NOFRAMES, attrs);
         ppBody ppstrm body;
         ppCloseTag ppstrm S.NOFRAMES)
    and ppFlow ppstrm (Flow_BLOCK block) = ppBlock ppstrm block
      | ppFlow ppstrm (Flow_INLINE inline) = ppInline ppstrm inline
    and ppBlock ppstrm (ADDRESS content) =
        ppTagAndChildren ppstrm ppInline S.ADDRESS content
      | ppBlock ppstrm (BLOCKQUOTE content) =
        ppTagAndChildren ppstrm ppBlock_or_script S.BLOCKQUOTE content
      | ppBlock ppstrm (CENTER content) =
        ppTagAndChildren ppstrm ppFlow S.CENTER content
      | ppBlock ppstrm (DIR content) =
        ppTagAndChildren ppstrm ppList_item S.DIR content
      | ppBlock ppstrm (DIV content) =
        ppTagAndChildren ppstrm ppFlow S.DIV content
      | ppBlock ppstrm (DL content) =
        ppTagAndChildren ppstrm ppDef_term_or_desc S.DL content
      | ppBlock ppstrm (FIELDSET (attrs, legend_opt, children)) = (
        ppOpenTag ppstrm (S.FIELDSET, attrs);
        case legend_opt of SOME legend => ppLegend ppstrm legend | NONE => ();
        ppChildren ppstrm ppFlow children;
        ppCloseTag ppstrm S.FIELDSET
        )
      | ppBlock ppstrm (FORM content) =
        ppTagAndChildren ppstrm ppBlock_or_script S.FORM content
      | ppBlock ppstrm (H1 content) =
        ppTagAndChildren ppstrm ppInline S.H1 content
      | ppBlock ppstrm (H2 content) =
        ppTagAndChildren ppstrm ppInline S.H2 content
      | ppBlock ppstrm (H3 content) =
        ppTagAndChildren ppstrm ppInline S.H3 content
      | ppBlock ppstrm (H4 content) =
        ppTagAndChildren ppstrm ppInline S.H4 content
      | ppBlock ppstrm (H5 content) =
        ppTagAndChildren ppstrm ppInline S.H5 content
      | ppBlock ppstrm (H6 content) =
        ppTagAndChildren ppstrm ppInline S.H6 content
      | ppBlock ppstrm (HR attrs) = ppOpenTag ppstrm (S.HR, attrs)
      | ppBlock ppstrm (ISINDEX attrs) = ppOpenTag ppstrm (S.ISINDEX, attrs)
      | ppBlock ppstrm (MENU content) =
        ppTagAndChildren ppstrm ppList_item S.MENU content
      | ppBlock ppstrm (NOSCRIPT content) =
        ppTagAndChildren ppstrm ppBlock S.NOSCRIPT content
      | ppBlock ppstrm (OL content) =
        ppTagAndChildren ppstrm ppList_item S.OL content
      | ppBlock ppstrm (P content) =
        ppTagAndChildren ppstrm ppInline S.P content
      | ppBlock ppstrm (PRE content) =
        ppTagAndChildren ppstrm ppInline S.PRE content
      | ppBlock ppstrm (TABLE content) =
        ppTagAndChildren ppstrm ppTable_data S.TABLE content
      | ppBlock ppstrm (UL content) =
        ppTagAndChildren ppstrm ppList_item S.UL content
    and ppInline ppstrm (A content) =
        ppTagAndChildren ppstrm ppInline S.A content
      | ppInline ppstrm (ABBR content) =
        ppTagAndChildren ppstrm ppInline S.ABBR content
      | ppInline ppstrm (ACRONYM content) =
        ppTagAndChildren ppstrm ppInline S.ACRONYM content
      | ppInline ppstrm (APPLET content) =
        ppTagAndChildren ppstrm ppFlow_or_param S.APPLET content
      | ppInline ppstrm (B content) =
        ppTagAndChildren ppstrm ppInline S.B content
      | ppInline ppstrm (BASEFONT attrs) = ppOpenTag ppstrm (S.BASEFONT, attrs)
      | ppInline ppstrm (BDO content) =
        ppTagAndChildren ppstrm ppInline S.BDO content
      | ppInline ppstrm (BIG content) =
        ppTagAndChildren ppstrm ppInline S.BIG content
      | ppInline ppstrm (BR attrs) = ppOpenTag ppstrm (S.BR, attrs)
      | ppInline ppstrm (BUTTON content) =
        ppTagAndChildren ppstrm ppFlow S.BUTTON content
      | ppInline ppstrm (CDATA children) = app (ppCdata ppstrm) children
      | ppInline ppstrm (CITE content) =
        ppTagAndChildren ppstrm ppInline S.CITE content
      | ppInline ppstrm (CODE content) =
        ppTagAndChildren ppstrm ppInline S.CODE content
      | ppInline ppstrm (DFN content) =
        ppTagAndChildren ppstrm ppInline S.DFN content
      | ppInline ppstrm (EM content) =
        ppTagAndChildren ppstrm ppInline S.EM content
      | ppInline ppstrm (FONT content) =
        ppTagAndChildren ppstrm ppInline S.FONT content
      | ppInline ppstrm (I content) =
        ppTagAndChildren ppstrm ppInline S.I content
      | ppInline ppstrm (IFRAME content) =
        ppTagAndChildren ppstrm ppFlow S.IFRAME content
      | ppInline ppstrm (IMG attrs) = ppOpenTag ppstrm (S.IMG, attrs)
      | ppInline ppstrm (INPUT attrs) = ppOpenTag ppstrm (S.INPUT, attrs)
      | ppInline ppstrm (Inline_SCRIPT script) = ppScript ppstrm script
      | ppInline ppstrm (KBD content) =
        ppTagAndChildren ppstrm ppInline S.KBD content
      | ppInline ppstrm (LABEL content) =
        ppTagAndChildren ppstrm ppInline S.LABEL content
      | ppInline ppstrm (MAP content) =
        ppTagAndChildren ppstrm ppBlock_or_area S.MAP content
      | ppInline ppstrm (OBJECT content) =
        ppTagAndChildren ppstrm ppFlow_or_param S.OBJECT content
      | ppInline ppstrm (Q content) =
        ppTagAndChildren ppstrm ppInline S.Q content
      | ppInline ppstrm (S content) =
        ppTagAndChildren ppstrm ppInline S.S content
      | ppInline ppstrm (SAMP content) =
        ppTagAndChildren ppstrm ppInline S.SAMP content
      | ppInline ppstrm (SELECT content) =
        ppTagAndChildren ppstrm ppOptgroup_or_option S.SELECT content
      | ppInline ppstrm (SMALL content) =
        ppTagAndChildren ppstrm ppInline S.SMALL content
      | ppInline ppstrm (SPAN content) =
        ppTagAndChildren ppstrm ppInline S.SPAN content
      | ppInline ppstrm (STRIKE content) =
        ppTagAndChildren ppstrm ppInline S.STRIKE content
      | ppInline ppstrm (STRONG content) =
        ppTagAndChildren ppstrm ppInline S.STRONG content
      | ppInline ppstrm (SUB content) =
        ppTagAndChildren ppstrm ppInline S.SUB content
      | ppInline ppstrm (SUP content) =
        ppTagAndChildren ppstrm ppInline S.SUP content
      | ppInline ppstrm (TEXTAREA content) =
        ppTagAndChildren ppstrm ppCdata S.TEXTAREA content
      | ppInline ppstrm (TT content) =
        ppTagAndChildren ppstrm ppInline S.TT content
      | ppInline ppstrm (U content) =
        ppTagAndChildren ppstrm ppInline S.U content
      | ppInline ppstrm (VAR content) =
        ppTagAndChildren ppstrm ppInline S.VAR content
    and ppList_item ppstrm (LI content) =
        ppTagAndChildren ppstrm ppFlow S.LI content
    and ppScript ppstrm (SCRIPT content) =
        ppTagAndChildren ppstrm ppCdata S.SCRIPT content
    and ppParam ppstrm (PARAM attrs) = ppOpenTag ppstrm (S.PARAM, attrs)
    and ppLegend ppstrm (LEGEND content) =
        ppTagAndChildren ppstrm ppInline S.LEGEND content
    and ppDef_term_or_desc ppstrm (DD content) =
        ppTagAndChildren ppstrm ppFlow S.DD content
      | ppDef_term_or_desc ppstrm (DT content) =
        ppTagAndChildren ppstrm ppInline S.DT content
    and ppTable_data ppstrm (CAPTION content) =
        ppTagAndChildren ppstrm ppInline S.CAPTION content
      | ppTable_data ppstrm (COL col) = ppCol ppstrm col
      | ppTable_data ppstrm (COLGROUP content) =
        ppTagAndChildren ppstrm ppCol S.COLGROUP content
      | ppTable_data ppstrm (TBODY content) =
        ppTagAndChildren ppstrm ppTr S.TBODY content
      | ppTable_data ppstrm (TFOOT content) =
        ppTagAndChildren ppstrm ppTr S.TFOOT content
      | ppTable_data ppstrm (THEAD content) =
        ppTagAndChildren ppstrm ppTr S.THEAD content
    and ppTr ppstrm (TR content) =
        ppTagAndChildren ppstrm ppTh_or_td S.TR content
    and ppTh_or_td ppstrm (TD content) =
        ppTagAndChildren ppstrm ppFlow S.TD content
      | ppTh_or_td ppstrm (TH content) =
        ppTagAndChildren ppstrm ppFlow S.TH content
    and ppOptgroup_or_option ppstrm (OPTGROUP content) =
        ppTagAndChildren ppstrm
	   (fn pstrm => fn opt => ppTagAndChildren ppstrm ppCdata S.OPTION opt)
	   S.OPTGROUP content
      | ppOptgroup_or_option ppstrm (OPTION content) =
	ppTagAndChildren ppstrm ppCdata S.OPTION content
    and ppFlow_or_param ppstrm (FlowOrParam_FLOW flow) = ppFlow ppstrm flow
      | ppFlow_or_param ppstrm (FlowOrParam_PARAM param) = ppParam ppstrm param
    and ppBlock_or_script ppstrm (BlockOrScript_BLOCK block) =
        ppBlock ppstrm block
      | ppBlock_or_script ppstrm (BlockOrScript_SCRIPT script) =
        ppScript ppstrm script
    and ppBlock_or_area ppstrm (AREA attrs) = ppOpenTag ppstrm (S.AREA, attrs)
      | ppBlock_or_area ppstrm (BlockOrArea_BLOCK block) = ppBlock ppstrm block
end

(* ____________________________________________________________ *)

val toString = PP.pp_to_string 80 ppHtml

end (* HTML4Printer *)

(* ______________________________________________________________________
   End of html4-printer.sml
   ______________________________________________________________________ *)