File: graphmlReader.ml

package info (click to toggle)
botch 0.24-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,084,624 kB
  • sloc: xml: 11,924,892; ml: 4,489; python: 3,890; sh: 1,268; makefile: 334
file content (205 lines) | stat: -rw-r--r-- 8,682 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) 2012 Johannes 'josch' Schauer <j.schauer@email.de>      *)
(*                                                                        *)
(*  This library is free software: you can redistribute it and/or modify  *)
(*  it under the terms of the GNU Lesser General Public License as        *)
(*  published by the Free Software Foundation, either version 3 of the    *)
(*  License, or (at your option) any later version.  A special linking    *)
(*  exception to the GNU Lesser General Public License applies to this    *)
(*  library, see the COPYING file for more information.                   *)
(**************************************************************************)

open! ExtLib

type value =
  | Int of int
  | Float of float
  | String of string
  | Bool of bool
  | List of value_list
and value_list = (string * value) list

type tree = E of string * (string * string) list * tree list | D of string

module Parse
  (B : Graph.Builder.S)
  (L : sig val node : value_list -> B.G.V.label
           val edge : value_list -> B.G.E.label end) = 
struct
  let create_graph ?(nodesort=None) ?(edgesort=None) l =
    let nodes = Hashtbl.create 1000 in
    let handle_node g l =
      let n = B.G.V.create (L.node l) in
      begin
        try
          let id = List.assoc "id" l in Hashtbl.add nodes id n
        with Not_found ->
          ()
      end;
      B.add_vertex g n
    in
    let handle_edge g l =
      try
        let source = List.assoc "source" l in
        let target = List.assoc "target" l in
        let nsource = Hashtbl.find nodes source in
        let ntarget = Hashtbl.find nodes target in
        let e = B.G.E.create nsource (L.edge l) ntarget in
        B.add_edge_e g e
      with Not_found ->
        g
    in
    let g = B.empty () in
    (* 1st pass: create the nodes *)
    let g = match nodesort with
      | None ->
        List.fold_left
          (fun g v -> match v with
             | "node", List l -> handle_node g l
             | _ -> g
          ) g l
      | Some sortfunc ->
        let nodes = List.sort ~cmp:sortfunc
            (List.filter_map (function | "node", List l -> Some l | _ -> None) l)
        in
        List.fold_left handle_node g nodes
    in
    (* 2nd pass: add the edges *)
    match edgesort with
      | None ->
        List.fold_left
          (fun g v -> match v with
             | "edge", List l -> handle_edge g l
             | _ -> g
          ) g l
      | Some sortfunc ->
        let edges = List.sort ~cmp:sortfunc
            (List.filter_map (function | "edge", List l -> Some l | _ -> None) l)
        in
        List.fold_left handle_edge g edges
  let parse ?(nodesort=None) ?(edgesort=None) ic =
    let i = Xmlm.make_input (`Channel ic) in
    let el ((_,tag),attrs) children = E (tag, List.map (fun ((_,n),v) -> n,v) attrs, children)  in
    let data d = D d in
    let _,doc = try Xmlm.input_doc_tree ~el ~data i
      with
      | Xmlm.Error ((line,col),error) ->
        invalid_arg (Printf.sprintf "Line %d, Column %d: %s"
                       line col (Xmlm.error_message error))
    in
    let assoc k l = try List.assoc k l with Not_found -> invalid_arg (Printf.sprintf "Cannot find attribute %s" k) in
    (* expect <graphml> *)
    let graphml = match doc with
      | E (el,_,tl) ->
         if el = "graphml" then tl
         else invalid_arg (Printf.sprintf "Expected <graphml>, got %s" el)
      | D _ -> invalid_arg "Expected <graphml>, got data"
    in
    (* find all <graph> and <key> *)
    let keys, graphs = List.fold_left (fun (k,g) t ->
      match t with
        | E (el,a,st) ->
          begin match el with
           | "key" -> (a,st)::k,g
           | "graph" -> k,(a,st)::g
           | "desc" | "data " -> k,g (* silently ignore <desc> and <data> *)
           | _ -> invalid_arg (Printf.sprintf "Unexpected child of <graphml>: %s" el)
          end
        | D _ -> k,g (* silently ignore data *)
    ) ([],[]) graphml in
    (* extract node and edge keys for attribute types *)
    let nkeys, ekeys = 
      List.fold_left (fun (nk,ek) (a,_) ->
        let f = assoc "for" a in
        let i = assoc "id" a in
        let t = assoc "attr.type" a in
        let n = assoc "attr.name" a in
        match f with
         | "node" -> (i,(t,n))::nk,ek
         | "edge" -> nk,(i,(t,n))::ek
         | _ -> invalid_arg (Printf.sprintf "Only support for \"node\" and \"edge\" keys, not %s" f)
      ) ([],[]) keys
    in
    (* error if more than one *)
    let graphattr,graphtree = match graphs with
     | [] -> invalid_arg "No <graph> elements"
     | [l] -> l
     | _ -> invalid_arg "No support for more than one <graph>"
    in
    (* check whether input graph has the same directed-ness as the graph builder *)
    let is_directed = assoc "edgedefault" graphattr = "directed" in
    if B.G.is_directed then
       if is_directed then () else invalid_arg "cannot read undirected graphml into directed graph builder"
    else
       if is_directed then invalid_arg "cannot read directed graphml into undirected graph builder" else ();
    (* extract nodes and edges *)
    let l = List.filter_map (function
      | E (el,a,t) -> begin
        match el with
         | "node" -> begin
           let attr = List.filter_map (function
            | E (el,a,t) ->
            begin match el with
             | "data" -> begin
               let k = assoc "key" a in
               let d = match t with [D d] -> d | _ -> invalid_arg "Expected data" in
               try begin match List.assoc k nkeys with
                | ("string",n) -> Some (n, String d)
                | ("int",n) -> Some(n, Int (int_of_string d))
                | ("long",n) -> Some(n, Int (int_of_string d))
                | ("float",n) -> Some(n, Float (float_of_string d))
                | ("bool",n) -> Some(n, Bool (bool_of_string d))
                | (t,n) -> invalid_arg (Printf.sprintf "Unsupported node type %s for attribute %s" t n)
               end with Not_found -> invalid_arg (Printf.sprintf "Cannot find node type %s" k)
             end
             | "desc" -> None (* silently ignore <desc> *)
             | "port" -> invalid_arg "No support for <port>"
             | "graph" -> invalid_arg "No support for nested graphs"
             | "locator" -> invalid_arg "No support for <locator>"
             | _ -> invalid_arg (Printf.sprintf "Unexpected child of <node>: %s" el)
            end
            | D _ -> None (* silently ignore data *)
           ) t in
           let i = assoc "id" a in
           let l = ("id",String i)::attr in
           Some ("node", List l)
         end
         | "edge" -> begin
           let attr = List.filter_map (function
            | E (el,a,t) ->
            begin match el with
             | "data" -> begin
               let k = assoc "key" a in
               let d = match t with [D d] -> d | _ -> invalid_arg "Expected data" in
               try begin match List.assoc k ekeys with
                | ("string",n) -> Some (n, String d)
                | ("int",n) -> Some(n, Int (int_of_string d))
                | ("long",n) -> Some(n, Int (int_of_string d))
                | ("float",n) -> Some(n, Float (float_of_string d))
                | ("bool",n) -> Some(n, Bool (bool_of_string d))
                | (t,n) -> invalid_arg (Printf.sprintf "Unsupported edge type %s for attribute %s" t n)
               end with Not_found -> invalid_arg (Printf.sprintf "Cannot find edge type %s" k)
             end
             | "desc" -> None (* silently ignore desc *)
             | "graph" -> invalid_arg "No support for nested graphs"
             | _ -> invalid_arg (Printf.sprintf "Unexpected child of <edge>: %s" el)
            end
            | D _ -> None (* silently ignore data *)
           ) t in
           let source = assoc "source" a in
           let target = assoc "target" a in
           let l = ("source",String source)::("target",String target)::attr in
           let l = if List.mem_assoc "id" a then ("id",String (assoc "id" a))::l else l in
           Some ("edge", List l)
         end
         | "desc" | "data" -> None (* silently ignore <desc> and <data> *)
         | "hyperedge" -> invalid_arg "No support for <hyperedge>"
         | "locator" -> invalid_arg "No support for <locator>"
         | _ -> invalid_arg (Printf.sprintf "Unexpected child of <graph>: %s" el)
      end
      | D _ -> None (* silently ignore data *)
    ) graphtree in
    let g = create_graph ~nodesort ~edgesort l in
    g
end