File: validate.ml

package info (click to toggle)
pxp 1.2.9-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 7,844 kB
  • sloc: ml: 28,666; xml: 2,597; makefile: 822; sh: 691
file content (223 lines) | stat: -rw-r--r-- 6,104 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
(* $Id$
 * ----------------------------------------------------------------------
 *
 *)


open Pxp_document;;
open Pxp_tree_parser;;
open Pxp_types;;

let error_happened = ref false;;

let print_error e =
  print_endline (string_of_exn e)
;;

class warner =
  object 
    method warn w =
      print_endline ("WARNING: " ^ w)
  end
;;

let resolve_by_helper scheme program sends_mime_header =
  let url_syntax =
    { Neturl.ip_url_syntax with 
	Neturl.url_accepts_8bits = true
    }
  in
  let get_url id =
    (* Only accept SYSTEM Ids with the right scheme: *)
    match id with
	System sysid ->
	  ( try
	      let sysid_scheme = 
		try Neturl.extract_url_scheme sysid 
		with Neturl.Malformed_URL -> scheme
		     (* If no scheme found: assume our own scheme *)
	      in
	      if sysid_scheme = scheme then
		Neturl.url_of_string url_syntax sysid  (* or Malformed_URL *)
	      else
		raise Pxp_reader.Not_competent
	    with
		(* If the URL is syntactically wrong, do not accept it: *)
		Neturl.Malformed_URL -> raise Pxp_reader.Not_competent
	  )
      | _ ->
	  raise Pxp_reader.Not_competent
  in
  let read_mime_header ch =
    let empty_re = Str.regexp "^[ \t\r\n]*$" in
    let is_empty s = Str.string_match empty_re s 0 in
    let buffer = Buffer.create 1024 in
    let line = ref(input_line ch) in
    if String.length !line >= 6 && String.sub !line 0 5 = "HTTP/" then
      line := input_line ch;
    while not (is_empty !line) do
      Buffer.add_string buffer !line;
      Buffer.add_string buffer "\n";
      line := input_line ch
    done;
    Buffer.add_string buffer "\n";
    Buffer.contents buffer
  in
  let open_channel id url =
    let url_string = Neturl.string_of_url url in
    let command = program ^ " " ^ Filename.quote url_string in
    let ch = Unix.open_process_in command in
    if sends_mime_header then
      let header_string = read_mime_header ch in
      let header_alist,_ = Mimestring.scan_header 
			     header_string 0 (String.length header_string) in
      let content_type = 
	try List.assoc "content-type" header_alist 
	with Not_found -> "application/octet-stream" in
      let mime_type, mime_type_params =
	Mimestring.scan_mime_type content_type [] in
      let encoding =
	try Some(Netconversion.encoding_of_string
		   (List.assoc "charset" mime_type_params))
	with Not_found -> None in
      ch, encoding
    else
      ch, None
  in
  let close_channel ch =
    match Unix.close_process_in ch with
	Unix.WEXITED 0 ->
	  ()
      | Unix.WEXITED n ->
	  failwith("Command terminated with exit code " ^ string_of_int n)
      | Unix.WSIGNALED n ->
	  failwith("Command terminated by signal " ^ string_of_int n)
      | _ -> assert false
  in
  new Pxp_reader.resolve_read_url_channel
    ~close:close_channel
    ~url_of_id: get_url
    ~channel_of_url: open_channel
    ()
;;


let parse debug wf namespaces iso88591 helpers filename =
  try 
    (* Parse the document: *)
    let parse_fn =
      if wf then parse_wfdocument_entity ?transform_dtd:None
      else 
	let index = new hash_index in
	parse_document_entity 
	  ?transform_dtd:None 
	  ~id_index:(index :> 'ext index)
    in
    let mng =
      if namespaces then
	Some (new Pxp_dtd.namespace_manager)
      else
	None
    in
    let resolver =
      let file_resolver =
	new Pxp_reader.resolve_as_file() in
      new Pxp_reader.combine (helpers @ [file_resolver])
    in
    let start_id =
      System filename in
    let spec =
      if namespaces then default_namespace_spec else default_spec in
    let doc =
      parse_fn
	  { default_config with 
	      debugging_mode = debug;
	      encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
	      idref_pass = true;
	      enable_namespace_processing = mng;
	      warner = new warner
          }
	  (ExtID(start_id, resolver))
	  spec 
    in
    ()
  with
      e ->
	(* Print error; remember that there was an error *)
	error_happened := true;
	print_error e;
	(* raise e *)
;;


let main() =
  let debug = ref false in
  let wf = ref false in
  let namespaces = ref false in
  let iso88591 = ref false in
  let helpers = ref [] in
  let files = ref [] in

  let eq_split s =
    let eq = 
      try String.index s '='
      with Not_found -> raise(Arg.Bad "Syntax error")
    in
    let before_eq = String.sub s 0 eq in
    let after_eq = String.sub s (eq+1) (String.length s - eq - 1) in
    (before_eq, after_eq)
  in

  let add_helper sends_mime_header s =
    let scheme,cmd = eq_split s in
    let h = resolve_by_helper scheme cmd sends_mime_header in
    helpers := !helpers @ [h]
  in

  let add_pubid s =
    let pubid,filename = eq_split s in
    let h = Pxp_reader.lookup_public_id_as_file [pubid,filename] in
    helpers := !helpers @ [h]
  in

  let add_sysid s =
    let sysid,filename = eq_split s in
    let h = Pxp_reader.lookup_system_id_as_file [sysid,filename] in
    helpers := !helpers @ [h]
  in

  Arg.parse
      [ "-d",   Arg.Set debug, 
           "                     turn debugging mode on";
	"-wf",  Arg.Set wf,    
            "                    check only for well-formedness";
	"-namespaces", Arg.Set namespaces,
	            "            enable namespace support";
        "-iso-8859-1", Arg.Set iso88591, 
                    "            use ISO-8859-1 as internal encoding instead of UTF-8";
	"-helper", Arg.String (add_helper false),
	        "scheme=cmd      add this helper command";
	"-helper-mh", Arg.String (add_helper true),
	           "scheme=cmd   add this helper command (which sends mime headers)";
	"-pubid", Arg.String add_pubid,
	       "id=file          map this PUBLIC id to this file";
	"-sysid", Arg.String add_sysid,
	       "id=file          map this SYSTEM id to this file";
      ]
      (fun x -> files := x :: !files)
      "
usage: pxpvalidate [options] URL ...

- checks the validity of XML documents. See below for list of options.

<title>PXP - The XML parser for Objective Caml</title>

List of options:";
  files := List.rev !files;
  List.iter (parse !debug !wf !namespaces !iso88591 !helpers) !files;
;;


main();
if !error_happened then exit(1);;