1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
|
(* html4-printer.sml
*
* COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org)
* All rights reserved.
*)
structure HTML4Printer : sig
val toString : HTML4.html -> string
end = struct
structure H4 = HTML4
structure PP = PrettyPrint
exception NotImplemented
structure HTML4TagStrings = struct
(* Strict *)
val A = "A"
val ABBR = "ABBR"
val ACRONYM = "ACRONYM"
val ADDRESS = "ADDRESS"
val AREA = "AREA"
val B = "B"
val BASE = "BASE"
val BDO = "BDO"
val BIG = "BIG"
val BLOCKQUOTE = "BLOCKQUOTE"
val BODY = "BODY"
val BR = "BR"
val BUTTON = "BUTTON"
val CAPTION = "CAPTION"
val CITE = "CITE"
val CODE = "CODE"
val COL = "COL"
val COLGROUP = "COLGROUP"
val DD = "DD"
val DEL = "DEL"
val DFN = "DFN"
val DIV = "DIV"
val DL = "DL"
val DT = "DT"
val EM = "EM"
val FIELDSET = "FIELDSET"
val FORM = "FORM"
val H1 = "H1"
val H2 = "H2"
val H3 = "H3"
val H4 = "H4"
val H5 = "H5"
val H6 = "H6"
val HEAD = "HEAD"
val HR = "HR"
val HTML = "HTML"
val I = "I"
val IMG = "IMG"
val INPUT = "INPUT"
val INS = "INS"
val KBD = "KBD"
val LABEL = "LABEL"
val LEGEND = "LEGEND"
val LI = "LI"
val LINK = "LINK"
val MAP = "MAP"
val META = "META"
val NOSCRIPT = "NOSCRIPT"
val OBJECT = "OBJECT"
val OL = "OL"
val OPTGROUP = "OPTGROUP"
val OPTION = "OPTION"
val P = "P"
val PARAM = "PARAM"
val PRE = "PRE"
val Q = "Q"
val SAMP = "SAMP"
val SCRIPT = "SCRIPT"
val SELECT = "SELECT"
val SMALL = "SMALL"
val SPAN = "SPAN"
val STRONG = "STRONG"
val STYLE = "STYLE"
val SUB = "SUB"
val SUP = "SUP"
val TABLE = "TABLE"
val TBODY = "TBODY"
val TD = "TD"
val TEXTAREA = "TEXTAREA"
val TFOOT = "TFOOT"
val TH = "TH"
val THEAD = "THEAD"
val TITLE = "TITLE"
val TR = "TR"
val TT = "TT"
val UL = "UL"
val VAR = "VAR"
(* Frameset *)
val FRAME = "FRAME"
val FRAMESET = "FRAMESET"
val NOFRAMES = "NOFRAMES"
(* Loose *)
val APPLET = "APPLET"
val BASEFONT = "BASEFONT"
val CENTER = "CENTER"
val DIR = "DIR"
val FONT = "FONT"
val IFRAME = "IFRAME"
val ISINDEX = "ISINDEX"
val MENU = "MENU"
val S = "S"
val STRIKE = "STRIKE"
val U = "U"
end
structure S = HTML4TagStrings
val strictStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
val looseStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
val framesetStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\" \"http://www.w3.org/TR/html4/frameset.dtd\">"
val xhtmlStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
(* ____________________________________________________________ *)
fun isStrict _ = true
fun getVersionStr (doc as H4.HTML {head, content, ...}) =
case content of
H4.BodyOrFrameset_BODY (H4.BODY (attrs, children)) =>
if isStrict children then strictStr else looseStr
| H4.BodyOrFrameset_FRAMESET _ => framesetStr
(* ____________________________________________________________ *)
fun ppOpenTag ppstrm (tag, attributes) =
PP.string ppstrm (String.concat
["<", tag,
case attributes of
attrs as _::_ => " " ^ (HTML4Utils.attrsToStr attrs)
| [] => "",
">"])
fun ppCloseTag ppstrm tag =
PP.string ppstrm (String.concat ["</", tag, ">"])
fun ppChildren _ _ [] = ()
| ppChildren ppstrm ppChild children = (
PP.openHVBox ppstrm (PP.Abs 2);
PP.newline ppstrm;
app (ppChild ppstrm) children;
PP.closeBox ppstrm;
PP.newline ppstrm
)
fun ppTagAndChildren ppstrm ppChild tag (attrs, children) = (
ppOpenTag ppstrm (tag, attrs);
ppChildren ppstrm ppChild children;
ppCloseTag ppstrm tag
)
(* ____________________________________________________________ *)
local
open H4
in
fun ppCol ppstrm attrs = ppOpenTag ppstrm (S.COL, attrs)
fun ppCdata ppstrm (CHAR chNum) =
PP.string ppstrm ("&#" ^ (IntInf.toString chNum) ^ ";")
| ppCdata ppstrm (COMMENT comment_string) =
PP.string ppstrm comment_string
| ppCdata ppstrm (ENTITY ent) =
PP.string ppstrm ("&" ^ (Atom.toString ent) ^ ";")
| ppCdata ppstrm (PCDATA string_data) = PP.string ppstrm string_data
and ppHtml ppstrm (doc as HTML {version, head, content}) = (
PP.string ppstrm (case version of
SOME doctype_str => doctype_str
| NONE => getVersionStr doc);
PP.newline ppstrm;
ppOpenTag ppstrm (S.HTML, []);
PP.openHVBox ppstrm (PP.Abs 2);
PP.newline ppstrm;
ppOpenTag ppstrm (S.HEAD, []);
ppChildren ppstrm ppHead_content head;
ppCloseTag ppstrm S.HEAD;
PP.newline ppstrm;
ppBody_or_frameset ppstrm content;
PP.closeBox ppstrm;
PP.newline ppstrm;
ppCloseTag ppstrm S.HTML;
PP.newline ppstrm
)
and ppHead_content ppstrm (Head_BASE attrs) =
ppOpenTag ppstrm (S.BASE, attrs)
| ppHead_content ppstrm (Head_LINK attrs) =
ppOpenTag ppstrm (S.LINK, attrs)
| ppHead_content ppstrm (Head_META attrs) =
ppOpenTag ppstrm (S.META, attrs)
| ppHead_content ppstrm (Head_OBJECT contents) =
ppTagAndChildren ppstrm ppFlow_or_param S.OBJECT contents
| ppHead_content ppstrm (Head_SCRIPT child) = ppScript ppstrm child
| ppHead_content ppstrm (Head_STYLE contents) =
ppTagAndChildren ppstrm ppCdata S.STYLE contents
| ppHead_content ppstrm (Head_TITLE contents) =
ppTagAndChildren ppstrm ppCdata S.TITLE contents
and ppBody_or_frameset ppstrm (BodyOrFrameset_BODY body) =
ppBody ppstrm body
| ppBody_or_frameset ppstrm (BodyOrFrameset_FRAMESET frameset) =
ppFrameset ppstrm frameset
and ppBody ppstrm (BODY content) =
ppTagAndChildren ppstrm ppBlock_or_script S.BODY content
and ppFrameset ppstrm (FRAMESET (attrs, children, noframesOpt)) = (
ppOpenTag ppstrm (S.FRAMESET, attrs);
ppChildren ppstrm ppFrameset_or_frame children;
case noframesOpt of
SOME noframes => (PP.newline ppstrm;
ppNoframes ppstrm noframes)
| _ => ();
ppCloseTag ppstrm S.FRAMESET
)
and ppFrameset_or_frame ppstrm (FRAME attrs) =
ppOpenTag ppstrm (S.FRAME, attrs)
| ppFrameset_or_frame ppstrm (FramesetOrFrame_FRAMESET frameset) =
ppFrameset ppstrm frameset
and ppNoframes ppstrm (NOFRAMES (attrs, body)) =
(ppOpenTag ppstrm (S.NOFRAMES, attrs);
ppBody ppstrm body;
ppCloseTag ppstrm S.NOFRAMES)
and ppFlow ppstrm (Flow_BLOCK block) = ppBlock ppstrm block
| ppFlow ppstrm (Flow_INLINE inline) = ppInline ppstrm inline
and ppBlock ppstrm (ADDRESS content) =
ppTagAndChildren ppstrm ppInline S.ADDRESS content
| ppBlock ppstrm (BLOCKQUOTE content) =
ppTagAndChildren ppstrm ppBlock_or_script S.BLOCKQUOTE content
| ppBlock ppstrm (CENTER content) =
ppTagAndChildren ppstrm ppFlow S.CENTER content
| ppBlock ppstrm (DIR content) =
ppTagAndChildren ppstrm ppList_item S.DIR content
| ppBlock ppstrm (DIV content) =
ppTagAndChildren ppstrm ppFlow S.DIV content
| ppBlock ppstrm (DL content) =
ppTagAndChildren ppstrm ppDef_term_or_desc S.DL content
| ppBlock ppstrm (FIELDSET (attrs, legend_opt, children)) = (
ppOpenTag ppstrm (S.FIELDSET, attrs);
case legend_opt of SOME legend => ppLegend ppstrm legend | NONE => ();
ppChildren ppstrm ppFlow children;
ppCloseTag ppstrm S.FIELDSET
)
| ppBlock ppstrm (FORM content) =
ppTagAndChildren ppstrm ppBlock_or_script S.FORM content
| ppBlock ppstrm (H1 content) =
ppTagAndChildren ppstrm ppInline S.H1 content
| ppBlock ppstrm (H2 content) =
ppTagAndChildren ppstrm ppInline S.H2 content
| ppBlock ppstrm (H3 content) =
ppTagAndChildren ppstrm ppInline S.H3 content
| ppBlock ppstrm (H4 content) =
ppTagAndChildren ppstrm ppInline S.H4 content
| ppBlock ppstrm (H5 content) =
ppTagAndChildren ppstrm ppInline S.H5 content
| ppBlock ppstrm (H6 content) =
ppTagAndChildren ppstrm ppInline S.H6 content
| ppBlock ppstrm (HR attrs) = ppOpenTag ppstrm (S.HR, attrs)
| ppBlock ppstrm (ISINDEX attrs) = ppOpenTag ppstrm (S.ISINDEX, attrs)
| ppBlock ppstrm (MENU content) =
ppTagAndChildren ppstrm ppList_item S.MENU content
| ppBlock ppstrm (NOSCRIPT content) =
ppTagAndChildren ppstrm ppBlock S.NOSCRIPT content
| ppBlock ppstrm (OL content) =
ppTagAndChildren ppstrm ppList_item S.OL content
| ppBlock ppstrm (P content) =
ppTagAndChildren ppstrm ppInline S.P content
| ppBlock ppstrm (PRE content) =
ppTagAndChildren ppstrm ppInline S.PRE content
| ppBlock ppstrm (TABLE content) =
ppTagAndChildren ppstrm ppTable_data S.TABLE content
| ppBlock ppstrm (UL content) =
ppTagAndChildren ppstrm ppList_item S.UL content
and ppInline ppstrm (A content) =
ppTagAndChildren ppstrm ppInline S.A content
| ppInline ppstrm (ABBR content) =
ppTagAndChildren ppstrm ppInline S.ABBR content
| ppInline ppstrm (ACRONYM content) =
ppTagAndChildren ppstrm ppInline S.ACRONYM content
| ppInline ppstrm (APPLET content) =
ppTagAndChildren ppstrm ppFlow_or_param S.APPLET content
| ppInline ppstrm (B content) =
ppTagAndChildren ppstrm ppInline S.B content
| ppInline ppstrm (BASEFONT attrs) = ppOpenTag ppstrm (S.BASEFONT, attrs)
| ppInline ppstrm (BDO content) =
ppTagAndChildren ppstrm ppInline S.BDO content
| ppInline ppstrm (BIG content) =
ppTagAndChildren ppstrm ppInline S.BIG content
| ppInline ppstrm (BR attrs) = ppOpenTag ppstrm (S.BR, attrs)
| ppInline ppstrm (BUTTON content) =
ppTagAndChildren ppstrm ppFlow S.BUTTON content
| ppInline ppstrm (CDATA children) = app (ppCdata ppstrm) children
| ppInline ppstrm (CITE content) =
ppTagAndChildren ppstrm ppInline S.CITE content
| ppInline ppstrm (CODE content) =
ppTagAndChildren ppstrm ppInline S.CODE content
| ppInline ppstrm (DFN content) =
ppTagAndChildren ppstrm ppInline S.DFN content
| ppInline ppstrm (EM content) =
ppTagAndChildren ppstrm ppInline S.EM content
| ppInline ppstrm (FONT content) =
ppTagAndChildren ppstrm ppInline S.FONT content
| ppInline ppstrm (I content) =
ppTagAndChildren ppstrm ppInline S.I content
| ppInline ppstrm (IFRAME content) =
ppTagAndChildren ppstrm ppFlow S.IFRAME content
| ppInline ppstrm (IMG attrs) = ppOpenTag ppstrm (S.IMG, attrs)
| ppInline ppstrm (INPUT attrs) = ppOpenTag ppstrm (S.INPUT, attrs)
| ppInline ppstrm (Inline_SCRIPT script) = ppScript ppstrm script
| ppInline ppstrm (KBD content) =
ppTagAndChildren ppstrm ppInline S.KBD content
| ppInline ppstrm (LABEL content) =
ppTagAndChildren ppstrm ppInline S.LABEL content
| ppInline ppstrm (MAP content) =
ppTagAndChildren ppstrm ppBlock_or_area S.MAP content
| ppInline ppstrm (OBJECT content) =
ppTagAndChildren ppstrm ppFlow_or_param S.OBJECT content
| ppInline ppstrm (Q content) =
ppTagAndChildren ppstrm ppInline S.Q content
| ppInline ppstrm (S content) =
ppTagAndChildren ppstrm ppInline S.S content
| ppInline ppstrm (SAMP content) =
ppTagAndChildren ppstrm ppInline S.SAMP content
| ppInline ppstrm (SELECT content) =
ppTagAndChildren ppstrm ppOptgroup_or_option S.SELECT content
| ppInline ppstrm (SMALL content) =
ppTagAndChildren ppstrm ppInline S.SMALL content
| ppInline ppstrm (SPAN content) =
ppTagAndChildren ppstrm ppInline S.SPAN content
| ppInline ppstrm (STRIKE content) =
ppTagAndChildren ppstrm ppInline S.STRIKE content
| ppInline ppstrm (STRONG content) =
ppTagAndChildren ppstrm ppInline S.STRONG content
| ppInline ppstrm (SUB content) =
ppTagAndChildren ppstrm ppInline S.SUB content
| ppInline ppstrm (SUP content) =
ppTagAndChildren ppstrm ppInline S.SUP content
| ppInline ppstrm (TEXTAREA content) =
ppTagAndChildren ppstrm ppCdata S.TEXTAREA content
| ppInline ppstrm (TT content) =
ppTagAndChildren ppstrm ppInline S.TT content
| ppInline ppstrm (U content) =
ppTagAndChildren ppstrm ppInline S.U content
| ppInline ppstrm (VAR content) =
ppTagAndChildren ppstrm ppInline S.VAR content
and ppList_item ppstrm (LI content) =
ppTagAndChildren ppstrm ppFlow S.LI content
and ppScript ppstrm (SCRIPT content) =
ppTagAndChildren ppstrm ppCdata S.SCRIPT content
and ppParam ppstrm (PARAM attrs) = ppOpenTag ppstrm (S.PARAM, attrs)
and ppLegend ppstrm (LEGEND content) =
ppTagAndChildren ppstrm ppInline S.LEGEND content
and ppDef_term_or_desc ppstrm (DD content) =
ppTagAndChildren ppstrm ppFlow S.DD content
| ppDef_term_or_desc ppstrm (DT content) =
ppTagAndChildren ppstrm ppInline S.DT content
and ppTable_data ppstrm (CAPTION content) =
ppTagAndChildren ppstrm ppInline S.CAPTION content
| ppTable_data ppstrm (COL col) = ppCol ppstrm col
| ppTable_data ppstrm (COLGROUP content) =
ppTagAndChildren ppstrm ppCol S.COLGROUP content
| ppTable_data ppstrm (TBODY content) =
ppTagAndChildren ppstrm ppTr S.TBODY content
| ppTable_data ppstrm (TFOOT content) =
ppTagAndChildren ppstrm ppTr S.TFOOT content
| ppTable_data ppstrm (THEAD content) =
ppTagAndChildren ppstrm ppTr S.THEAD content
and ppTr ppstrm (TR content) =
ppTagAndChildren ppstrm ppTh_or_td S.TR content
and ppTh_or_td ppstrm (TD content) =
ppTagAndChildren ppstrm ppFlow S.TD content
| ppTh_or_td ppstrm (TH content) =
ppTagAndChildren ppstrm ppFlow S.TH content
and ppOptgroup_or_option ppstrm (OPTGROUP content) =
ppTagAndChildren ppstrm
(fn pstrm => fn opt => ppTagAndChildren ppstrm ppCdata S.OPTION opt)
S.OPTGROUP content
| ppOptgroup_or_option ppstrm (OPTION content) =
ppTagAndChildren ppstrm ppCdata S.OPTION content
and ppFlow_or_param ppstrm (FlowOrParam_FLOW flow) = ppFlow ppstrm flow
| ppFlow_or_param ppstrm (FlowOrParam_PARAM param) = ppParam ppstrm param
and ppBlock_or_script ppstrm (BlockOrScript_BLOCK block) =
ppBlock ppstrm block
| ppBlock_or_script ppstrm (BlockOrScript_SCRIPT script) =
ppScript ppstrm script
and ppBlock_or_area ppstrm (AREA attrs) = ppOpenTag ppstrm (S.AREA, attrs)
| ppBlock_or_area ppstrm (BlockOrArea_BLOCK block) = ppBlock ppstrm block
end
(* ____________________________________________________________ *)
val toString = PP.pp_to_string 80 ppHtml
end (* HTML4Printer *)
(* ______________________________________________________________________
End of html4-printer.sml
______________________________________________________________________ *)
|