1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
COMPILER HTML
(* Attempt to describe a fairly large subset of HTML (HyperText Markup Language)
This is incomplete, and parts are not properly described anyway!
P.D. Terry and P.D. Koeslag, Rhodes University, 1995 *)
IGNORE CASE
IGNORE CHR(1) .. CHR(13)
CHARACTERS
eol = CHR(13) .
Special = '<>&"=' .
Letter = CHR(33) .. CHR(255) - Special.
Noquote = ANY - '"' - eol.
TOKENS
Word = Letter { Letter } .
String = '"' { Noquote } '"' .
PRODUCTIONS
HTML = ( "<HTML>" HeadPart BodyPart "</HTML>"
| HeadPart BodyPart ) EOF .
HeadPart = "<HEAD>" TitleField "</HEAD>" | TitleField .
BodyPart = "<BODY>" { Field } "</BODY>" | { Field } .
TitleField = "<TITLE>" { TextOrImage } "</TITLE>" .
(* Or should this be simply
TitleField = "<TITLE>" { InlineText } "</TITLE>" .
*)
Field = Heading | Anchor | UnNumList | NumList | Address |
Paragraph | DefList | PreFormatted | TextOrImage .
Heading = ("<H1" Alignment ">" { TextOrImage } "</H1>") |
("<H2" Alignment ">" { TextOrImage } "</H2>") |
("<H3" Alignment ">" { TextOrImage } "</H3>") |
("<H4" Alignment ">" { TextOrImage } "</H4>") .
Alignment = [ "ALIGN" "=" ( "TOP" | "MIDDLE" | "CENTER" ) ] .
Anchor = "<A" ( "HREF" | "NAME" ) "=" FilePath ">"
TextOrImage { TextOrImage } "</A>" .
FilePath = Word | String .
TextOrImage = InlineText | Image .
UnNumList = "<UL>" { ListElement } "</UL>" .
NumList = "<OL>" { ListElement } "</OL>" .
ListElement = "<LI>" { Field } .
Address = "<ADDRESS>" InlineText { InlineText } "</ADDRESS>" .
Format = "<DFN>" { InlineText } "</DFN>"
| "<EM>" { InlineText } "</EM>"
| "<CITE>" { InlineText } "</CITE>"
| "<CODE>" { InlineText } "</CODE>"
| "<STRONG>" { InlineText } "</STRONG>" .
Paragraph = "<P" Alignment ">" .
DefList = "<DL>" DefListElement { DefListElement } "</DL>" .
DefListElement = "<DT>" { Field } "<DD>" { Field } .
PreFormatted = "<PRE>" { InlineText } "</PRE>" .
Image = "<IMG" Alignment "SRC" "=" FilePath Alignment
[ "ALT" "=" String ] ">" .
InlineText = Word | String | Escape | Control | Format .
Escape = "<" | ">" | "&" | """ .
Control = "</P>" | "<BR>" | "<HR>" | "=" |
"ALIGN" | "HREF" | "NAME" | "TOP" | "MIDDLE" | "CENTER" .
END HTML.
|