File: examples.ml

package info (click to toggle)
xmlm 1.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 424 kB
  • ctags: 670
  • sloc: ml: 1,770; sh: 90; makefile: 19
file content (142 lines) | stat: -rw-r--r-- 4,140 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
(* Examples from the documentation, this code is in public domain. *)

(* Sequential processing *)

let id ic oc = 
  let i = Xmlm.make_input (`Channel ic) in 
  let o = Xmlm.make_output (`Channel oc) in 
  let rec pull i o depth = 
    Xmlm.output o (Xmlm.peek i);
    match Xmlm.input i with 
    | `El_start _ -> pull i o (depth + 1)
    | `El_end -> if depth = 1 then () else pull i o (depth - 1)
    | `Data _ -> pull i o depth 
    | `Dtd _ -> assert false
  in
  Xmlm.output o (Xmlm.input i); (* `Dtd *)
  pull i o 0;
  if not (Xmlm.eoi i) then invalid_arg "document not well-formed"

let id_seq ic oc = 
  let i = Xmlm.make_input (`Channel ic) in 
  let o = Xmlm.make_output ~nl:true (`Channel oc) in 
  while not (Xmlm.eoi i) do Xmlm.output o (Xmlm.input i) done

let prune_docs prune_list ic oc = 
  let i = Xmlm.make_input (`Channel ic) in
  let o = Xmlm.make_output ~nl:true (`Channel oc) in
  let copy i o = Xmlm.output o (Xmlm.input i) in
  let prune (name, _) = List.mem name prune_list in
  let rec process i o d = 
    let rec skip i d = match Xmlm.input i with
    | `El_start _ -> skip i (d + 1)
    | `El_end -> if d = 1 then () else skip i (d - 1)
    | s -> skip i d
    in
    match Xmlm.peek i with 
    | `El_start tag when prune tag -> skip i 0; process i o d
    | `El_start _ -> copy i o; process i o (d + 1)
    | `El_end -> copy i o; if d = 0 then () else process i o (d - 1)
    | `Data _ -> copy i o; process i o d
    | `Dtd _ -> assert false
  in
  let rec docs i o = 
    copy i o; (* `Dtd *)
    copy i o; (* root start *)
    process i o 0;
    if Xmlm.eoi i then () else docs i o
  in
  docs i o

(* Tree processing *)

type tree = E of Xmlm.tag * tree list | D of string

let in_tree i = 
  let el tag childs = E (tag, childs)  in
  let data d = D d in
  Xmlm.input_doc_tree ~el ~data i

let out_tree o t = 
  let frag = function
  | E (tag, childs) -> `El (tag, childs) 
  | D d -> `Data d 
  in
  Xmlm.output_doc_tree frag o t

(* Tabular data processing. *)

type w3c_bureaucrat = { 
    name : string; 
    surname : string; 
    honest : bool; 
    obfuscation_level : float;
    trs : string list; }

let in_w3c_bureaucrats src = 
  let i = Xmlm.make_input ~strip:true src in
  let tag n = ("", n), [] in
  let error () = invalid_arg "parse error" in
  let accept s i = if Xmlm.input i = s then () else error () in
  let rec i_seq el acc i = match Xmlm.peek i with 
  | `El_start _ -> i_seq el ((el i) :: acc) i
  | `El_end -> List.rev acc
  | _ -> error ()
  in
  let i_el n i = 
    accept (`El_start (tag n)) i;
    let d = match Xmlm.peek i with
    | `Data d -> ignore (Xmlm.input i); d
    | `El_end -> ""
    | _ -> error ()
    in
    accept (`El_end) i;
    d
  in
  let i_bureaucrat i = 
    try
      accept (`El_start (tag "bureaucrat")) i;
      let name = i_el "name" i in
      let surname = i_el "surname" i in
      let honest = match Xmlm.peek i with
      | `El_start (("", "honest"), []) -> ignore (i_el "honest" i); true
      | _ -> false
      in
      let obf = float_of_string (i_el "obfuscation_level" i) in
      let trs = i_seq (i_el "tr") [] i in
      accept (`El_end) i;
      { name = name; surname = surname; honest = honest; 
        obfuscation_level = obf; trs = trs }
    with
    | Failure _ -> error () (* float_of_string *)
  in
  accept (`Dtd None) i;
  accept (`El_start (tag "list")) i;
  let bl = i_seq i_bureaucrat [] i in
  accept (`El_end) i;
  if not (Xmlm.eoi i) then invalid_arg "more than one document";
  bl

let out_w3c_bureaucrats dst bl = 
  let tag n = ("", n), [] in
  let o = Xmlm.make_output ~nl:true ~indent:(Some 2) dst in
  let out = Xmlm.output o in
  let o_el n d = 
    out (`El_start (tag n)); 
    if d <> "" then out (`Data d); 
    out `El_end 
  in
  let o_bureaucrat b = 
    out (`El_start (tag "bureaucrat"));
    o_el "name" b.name;
    o_el "surname" b.surname;
    if b.honest then o_el "honest" "";
    o_el "obfuscation_level" (string_of_float b.obfuscation_level);
    List.iter (o_el "tr") b.trs;
    out `El_end
  in
  out (`Dtd None);
  out (`El_start (tag "list"));
  List.iter o_bureaucrat bl;
  out (`El_end)