1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
/* -*-indented-text-*- ----------------------------------------------
*
Copyright (c) 1999 Christian Lindig <lindig@ips.cs.tu-bs.de>. All
rights reserved. See COPYING for details.
* $Id: xmlparse.mly,v 1.1 2002/03/05 14:23:03 monate Exp $
*
* This file implements a parser for XML files.
* See: http://www.w3.org/
*
* A XML file mixes markup data, comments, processing instructions
* (pi) and character data. Since the different kinds of content
* are indistinguishable by the scanner the parser controls
* so-called scanner contexts.
*/
%{
(* Helpers *)
open Xml (* XML abstract syntax *)
open Error (* error() *)
open Xmlstate (* setContext() *)
let n = None (* just to save space *)
%}
/* tokens with value */
%token <string> WORD
%token <string> CHUNK
%token <string> NAME
%token <string> STRING
%token <string> PIOPEN
%token <string> OPEN
%token <string> OPENSLASH
/* token */
%token CLOSE
%token COMMENT
%token DOCTYPE
%token DTDCLOSE
%token DTDOPEN
%token ENCODING
%token EOF
%token EQ
%token ERROR
%token PICLOSE
%token PUBLIC
%token S
%token SLASHCLOSE
%token STANDALONE
%token SYSTEM
%token VERSION
%token XMLCLOSE
%token XMLDECL
%token XMLNAME
%token XMLOPEN
%start document
%type <Xml.document> document
%%
document : prolog topelement misc EOF{ XML($1,$2,$3) }
topelement : element { setContext DataContext;
$1
}
/* xm dt pi */
prolog : xmldecl misc { Prolog($1,None ,$2 ) }
| xmldecl misc doctype misc { Prolog($1,Some($3),$2@$4) }
| misc doctype misc { Prolog(n ,Some($2),$1@$3) }
| misc { Prolog(n ,None ,$1 ) }
misc : /**/ { [] }
| misc pi { $2::$1 }
| misc CHUNK { $1 }
| misc COMMENT { $1 }
dtdopen : DTDOPEN { setContext DeclContext}
dtdclose : DTDCLOSE { setContext DataContext }
doctype : dtdopen NAME ext markup
dtdclose { DTD($2,$3) }
ext : /**/ { None }
| SYSTEM STRING { Some (DTDsys($2)) }
| PUBLIC STRING STRING { Some (DTDpub($2,$3)) }
markup : /**/ { None }
| error { error "DTDs are unsupported" }
element : emptyElemTag { let (n,a) = $1 in
Eempty(n,a)
}
| sTag content eTag { let (sn,a) = $1 in
let en = $3 in
let c = List.rev $2 in
if sn = en then
Eelement(sn,a,c)
else error ("tag mismatch")
}
opn : OPEN { setContext ElementContext; $1 }
opnslash : OPENSLASH { setContext ElementContext; $1 }
cls : CLOSE { setContext DataContext }
slashcls : SLASHCLOSE { setContext DataContext }
sTag : opn attributes cls { ($1,$2) }
eTag : opnslash cls { $1 }
emptyElemTag : opn attributes slashcls { ($1,$2) }
attributes : /**/ { [] }
| attributes attribute { $2::$1 }
attribute : NAME EQ STRING { ($1,$3) }
content : /**/ { [] }
| content CHUNK { Echunk($2)::$1 }
| content element { $2::$1 }
| content pi { Epi($2)::$1 }
| content COMMENT { $1 }
xmlopen : XMLOPEN { setContext DeclContext}
xmlclose : XMLCLOSE { setContext DataContext }
xmlinfo : version encoding sddecl { ($1,$2,Some $3) }
| version { ($1,n ,None ) }
| version encoding { ($1,$2,None ) }
| version sddecl { ($1,n ,Some $2) }
xmldecl : xmlopen xmlinfo xmlclose { match $2 with
(vers,enc,sa) ->
Some (XMLDecl(
vers, (* version *)
sa, (* standalone *)
enc (* encoding *)
))
}
version : VERSION EQ STRING { $3 }
encoding : ENCODING EQ STRING { Some $3 }
sddecl : STANDALONE EQ STRING { match $3 with
| "yes" -> true
| "no" -> false
| _ -> error "yes/no expected"
}
piopen : PIOPEN { setContext PiContext; $1}
pi : piopen picontent PICLOSE { setContext DataContext;
($1,List.rev $2)
}
picontent : /**/ { [] }
| picontent WORD { $2 :: $1 }
|