1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
|
(* $Id$
* ----------------------------------------------------------------------
*
*)
open Pxp_document;;
open Pxp_tree_parser;;
open Pxp_types;;
let error_happened = ref false;;
let print_error e =
print_endline (string_of_exn e)
;;
class warner =
object
method warn w =
print_endline ("WARNING: " ^ w)
end
;;
let resolve_by_helper scheme program sends_mime_header =
let url_syntax =
{ Neturl.ip_url_syntax with
Neturl.url_accepts_8bits = true
}
in
let get_url id =
(* Only accept SYSTEM Ids with the right scheme: *)
match id with
System sysid ->
( try
let sysid_scheme =
try Neturl.extract_url_scheme sysid
with Neturl.Malformed_URL -> scheme
(* If no scheme found: assume our own scheme *)
in
if sysid_scheme = scheme then
Neturl.url_of_string url_syntax sysid (* or Malformed_URL *)
else
raise Pxp_reader.Not_competent
with
(* If the URL is syntactically wrong, do not accept it: *)
Neturl.Malformed_URL -> raise Pxp_reader.Not_competent
)
| _ ->
raise Pxp_reader.Not_competent
in
let read_mime_header ch =
let empty_re = Str.regexp "^[ \t\r\n]*$" in
let is_empty s = Str.string_match empty_re s 0 in
let buffer = Buffer.create 1024 in
let line = ref(input_line ch) in
if String.length !line >= 6 && String.sub !line 0 5 = "HTTP/" then
line := input_line ch;
while not (is_empty !line) do
Buffer.add_string buffer !line;
Buffer.add_string buffer "\n";
line := input_line ch
done;
Buffer.add_string buffer "\n";
Buffer.contents buffer
in
let open_channel id url =
let url_string = Neturl.string_of_url url in
let command = program ^ " " ^ Filename.quote url_string in
let ch = Unix.open_process_in command in
if sends_mime_header then
let header_string = read_mime_header ch in
let header_alist,_ = Mimestring.scan_header
header_string 0 (String.length header_string) in
let content_type =
try List.assoc "content-type" header_alist
with Not_found -> "application/octet-stream" in
let mime_type, mime_type_params =
Mimestring.scan_mime_type content_type [] in
let encoding =
try Some(Netconversion.encoding_of_string
(List.assoc "charset" mime_type_params))
with Not_found -> None in
ch, encoding
else
ch, None
in
let close_channel ch =
match Unix.close_process_in ch with
Unix.WEXITED 0 ->
()
| Unix.WEXITED n ->
failwith("Command terminated with exit code " ^ string_of_int n)
| Unix.WSIGNALED n ->
failwith("Command terminated by signal " ^ string_of_int n)
| _ -> assert false
in
new Pxp_reader.resolve_read_url_channel
~close:close_channel
~url_of_id: get_url
~channel_of_url: open_channel
()
;;
let parse debug wf namespaces iso88591 helpers filename =
try
(* Parse the document: *)
let parse_fn =
if wf then parse_wfdocument_entity ?transform_dtd:None
else
let index = new hash_index in
parse_document_entity
?transform_dtd:None
~id_index:(index :> 'ext index)
in
let mng =
if namespaces then
Some (new Pxp_dtd.namespace_manager)
else
None
in
let resolver =
let file_resolver =
new Pxp_reader.resolve_as_file() in
new Pxp_reader.combine (helpers @ [file_resolver])
in
let start_id =
System filename in
let spec =
if namespaces then default_namespace_spec else default_spec in
let doc =
parse_fn
{ default_config with
debugging_mode = debug;
encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
idref_pass = true;
enable_namespace_processing = mng;
warner = new warner
}
(ExtID(start_id, resolver))
spec
in
()
with
e ->
(* Print error; remember that there was an error *)
error_happened := true;
print_error e;
(* raise e *)
;;
let main() =
let debug = ref false in
let wf = ref false in
let namespaces = ref false in
let iso88591 = ref false in
let helpers = ref [] in
let files = ref [] in
let eq_split s =
let eq =
try String.index s '='
with Not_found -> raise(Arg.Bad "Syntax error")
in
let before_eq = String.sub s 0 eq in
let after_eq = String.sub s (eq+1) (String.length s - eq - 1) in
(before_eq, after_eq)
in
let add_helper sends_mime_header s =
let scheme,cmd = eq_split s in
let h = resolve_by_helper scheme cmd sends_mime_header in
helpers := !helpers @ [h]
in
let add_pubid s =
let pubid,filename = eq_split s in
let h = Pxp_reader.lookup_public_id_as_file [pubid,filename] in
helpers := !helpers @ [h]
in
let add_sysid s =
let sysid,filename = eq_split s in
let h = Pxp_reader.lookup_system_id_as_file [sysid,filename] in
helpers := !helpers @ [h]
in
Arg.parse
[ "-d", Arg.Set debug,
" turn debugging mode on";
"-wf", Arg.Set wf,
" check only for well-formedness";
"-namespaces", Arg.Set namespaces,
" enable namespace support";
"-iso-8859-1", Arg.Set iso88591,
" use ISO-8859-1 as internal encoding instead of UTF-8";
"-helper", Arg.String (add_helper false),
"scheme=cmd add this helper command";
"-helper-mh", Arg.String (add_helper true),
"scheme=cmd add this helper command (which sends mime headers)";
"-pubid", Arg.String add_pubid,
"id=file map this PUBLIC id to this file";
"-sysid", Arg.String add_sysid,
"id=file map this SYSTEM id to this file";
]
(fun x -> files := x :: !files)
"
usage: pxpvalidate [options] URL ...
- checks the validity of XML documents. See below for list of options.
<title>PXP - The XML parser for Objective Caml</title>
List of options:";
files := List.rev !files;
List.iter (parse !debug !wf !namespaces !iso88591 !helpers) !files;
;;
main();
if !error_happened then exit(1);;
|