File: preprocess.ml

package info (click to toggle)
pxp 1.2.9-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 7,844 kB
  • sloc: ml: 28,666; xml: 2,597; makefile: 822; sh: 691
file content (253 lines) | stat: -rw-r--r-- 6,494 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
(* $Id$ *)

open Pxp_document
open Pxp_tree_parser
open Pxp_types

exception User_error of string

let user_error (where,line,pos) subject =
  let message =
    (if line <> 0 then
       "In " ^ where ^ ", at line " ^ string_of_int line ^ " position " ^ 
       string_of_int pos ^ ":\n"
     else
       ""
    )
    ^ subject
  in
  raise(User_error message)
;;


let macro_use_uri = "http://www.ocaml-programming.de/macro/use" ;;

let collect_macro_define tree =
  let container = Hashtbl.create 10 in
  (* Collect all macro:define elements and put them into [container].
   * Remove these elements from the tree.
   *)
  iter_tree
    ~pre:(fun node ->
	    if node # node_type = T_element "macro:define" then begin
	      let name = node # required_string_attribute "name" in
	      if Hashtbl.mem container name then
		user_error 
		  (node # position)
		  "Macro already defined";
	      if List.length (node # sub_nodes) <> 1 then
		user_error
		  (node # position)
		  "Macro must contain exactly one subnode";
	      node # remove();
	      Hashtbl.add container name (node # nth_node 0);
	      raise Skip        (* Do not iterate over the children of [node] *)
	    end
	 )
    tree;
  container
;;


let parameter_re = Str.regexp "\\$[-a-zA-Z0-9_.]+";;

let replace_macro_use container tree =
  let replace_params position data atts =
    let splitted = Str.full_split parameter_re data in
    let splitted' =
      List.map
	(function
	     Str.Text s  -> s
	   | Str.Delim s ->
	       let param_name = String.sub s 1 (String.length s - 1) in
	       ( try
		   let param_value = 
		     List.assoc param_name atts in   (* or Not_found *)
		   match param_value with
		       Value v -> v
		     | _       -> assert false
	         with
		       Not_found ->
			 user_error
			   position
			   ("Parameter " ^ param_name ^ " not found")
	       )
	)
	splitted in
    String.concat "" splitted'
  in

  let replace_use position name atts = 
    try
      let subst = Hashtbl.find container name in   (* or Not_found *)
      (* Make a copy of [subst], and replace the parameters: *)
      map_tree
	~pre:(fun node -> node # orphaned_flat_clone)
	~post:(fun node ->
		 match node # node_type with
		     T_data ->
		       (* It is possible that the data node contains
			* parameters
			*)
		       let data = node # data in
		       let data' = replace_params
				     (node # position)
				     data
				     atts in
		       node # set_data data';
		       node
		   | _ ->
		       node
	      )
	subst
    with
	Not_found ->
	  user_error position ("Macro not found: " ^ name)
  in
  (* Make a copy of [tree], and replace the macro calls: *)
  map_tree
    ~pre:(fun node -> node # orphaned_flat_clone)
    ~post:(fun node ->
	     match node # node_type with
		 T_element "macro:use" ->
		   let name = node # required_string_attribute "name" in
		   let atts = node # attributes in
		   let atts' = List.remove_assoc "name" atts in
		   replace_use (node # position) name atts'
	       | T_element _ when node # namespace_uri = macro_use_uri ->
		   let name = node # localname in
		   replace_use (node # position) name (node # attributes)
	       | _ ->
		   node
	  )
    tree
;;


let read_macro_dtd config =
  Pxp_dtd_parser.parse_dtd_entity config (from_file "macro.dtd")
;;


let copy_general_entities dtd1 dtd2 =
  (* Copy the general entities from dtd1 to dtd2 *)
  let names = dtd1 # gen_entity_names in
  List.iter
    (fun name ->
       let ent, is_external = dtd1 # gen_entity name in
       dtd2 # add_gen_entity ent is_external
    )
    names
;;


let transform config dtd filename =
  (* Read the document: *)
  let found_dtd_id = ref None in
  let found_root = ref None in
  let doc = parse_document_entity
	      ~transform_dtd:(fun found_dtd -> 
				(* Save the DTD ID: *)
				found_dtd_id := found_dtd # id;
				found_root := found_dtd # root;
				(* Copy general entities to [dtd]: *)
				copy_general_entities found_dtd dtd;
				(* Replace the found DTD by this one: *)
				dtd)
	      config
	      (from_file filename)
	      default_namespace_spec in
  let root = doc # root in
  (* Collect the macro definitions: 
   * (As a side effect, remove the definitions from the tree [root].)
   *)
  let definitions = collect_macro_define root in
  (* Expand the macro calls: *)
  let root' = replace_macro_use definitions root in
  (* Output the result: *)
  let root_element = find (fun node ->
			     match node # node_type with
				 T_element _ -> true 
			       | _ -> false
			  ) 
		          root' in
  let default_prefix = root_element # normprefix in
  print_string "<?xml version='1.0'?>\n";
  (* Output the DOCTYPE line, if needed. This is a bit delicate. *)
  ( match !found_dtd_id with
	Some (External _)
      | Some (Derived _) ->
	  let id = (match !found_dtd_id with
			Some (External x) -> x
		      | Some (Derived x) -> x
		      | _ -> assert false
		   ) in
	  (* If the id is Derived, the internal subset of the DTD is
	   * silently dropped.
	   *)
	  print_string "<!DOCTYPE ";
	  ( match root_element # node_type with
		T_element r -> 
		  (* Remove the default prefix: *)
		  let p, l = Pxp_aux.namespace_split r in
		  print_string (if p = default_prefix then l else r);
	      | _           -> assert false
	  );
	  ( match id with
		System sysid ->
		  print_string " SYSTEM \"";
		  print_string sysid;
		  print_string "\""
	      | Public(pubid,sysid) ->
		  print_string " PUBLIC \"";
		  print_string pubid;
		  print_string "\" \"";
		  print_string sysid;
		  print_string "\"";
	      | _ ->
		  assert false
	  );
	  print_string ">\n";
      | Some Internal
      | None ->
	  (* Internal DTDs are silently dropped *)
	  ()
  );
  root' # write ~default:default_prefix (`Out_channel stdout) `Enc_utf8
;;


let main() =
  let filename = ref "" in
  Arg.parse
      []
      (fun s ->
	 if !filename <> "" then
	   raise(Arg.Bad "Please, only one file at once!");
	 filename := s
      )
      "usage: preprocess [ options ] filename";
  if !filename = "" then
    user_error ("",0,0) "No input file";
  let config =
    { default_namespace_config with
	encoding = `Enc_utf8;
	enable_pinstr_nodes = true;
	enable_super_root_node = true;
	enable_comment_nodes = true;
    }
  in
  let dtd = read_macro_dtd config in
  transform config dtd !filename
;;

try
  main() 
with
    User_error message ->
      prerr_endline message;
      exit 1
  | other ->
      prerr_endline (string_of_exn other);
      exit 1
;;