File: dag.ml

package info (click to toggle)
ocaml-obuild 0.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,456 kB
  • sloc: ml: 14,491; sh: 211; ansic: 34; makefile: 11
file content (302 lines) | stat: -rw-r--r-- 9,751 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
(* Optimized bi-directional DAG implementation using sets and int indexing *)
open Printf
open Compat

(* IntSet for efficient membership and removal operations *)
module IntSet = Set.Make(struct
  type t = int
  let compare = compare
end)

(* Internal representation: nodes are mapped to integer IDs,
   and parent/child relationships use IntSet for O(log n) operations *)
type 'a dagnode =
    { mutable parents  : IntSet.t
    ; mutable children : IntSet.t
    }

type 'a t =
    { nodes : (int, 'a dagnode) Hashtbl.t  (* ID -> node structure *)
    ; node_to_id : ('a, int) Hashtbl.t      (* node -> ID mapping *)
    ; id_to_node : (int, 'a) Hashtbl.t      (* ID -> node mapping *)
    ; mutable next_id : int                 (* counter for new IDs *)
    }

let init () =
    { nodes = Hashtbl.create 16
    ; node_to_id = Hashtbl.create 16
    ; id_to_node = Hashtbl.create 16
    ; next_id = 0
    }

(* Get or create ID for a node *)
let get_node_id dag node =
    match SafeHashtbl.find_opt dag.node_to_id node with
    | Some id -> id
    | None ->
        let id = dag.next_id in
        dag.next_id <- dag.next_id + 1;
        Hashtbl.add dag.node_to_id node id;
        Hashtbl.add dag.id_to_node id node;
        id

let length dag = Hashtbl.length dag.nodes

(* Add an directed edge from a to b.
 *
 * 'a' is the parent of 'b'
 * 'b' is the child of 'a'
 *)
let add_edge a b dag =
    let aid = get_node_id dag a in
    let bid = get_node_id dag b in
    let maNode = SafeHashtbl.find_opt dag.nodes aid in
    let mbNode = SafeHashtbl.find_opt dag.nodes bid in
    (match (maNode, mbNode) with
    | None, None       ->
        Hashtbl.add dag.nodes aid { parents = IntSet.empty; children = IntSet.singleton bid };
        Hashtbl.add dag.nodes bid { parents = IntSet.singleton aid; children = IntSet.empty }
    | Some aNode, None ->
        aNode.children <- IntSet.add bid aNode.children;
        Hashtbl.add dag.nodes bid { parents = IntSet.singleton aid; children = IntSet.empty }
    | None, Some bNode ->
        bNode.parents <- IntSet.add aid bNode.parents;
        Hashtbl.add dag.nodes aid { parents = IntSet.empty; children = IntSet.singleton bid }
    | Some aNode, Some bNode ->
        aNode.children <- IntSet.add bid aNode.children;
        bNode.parents <- IntSet.add aid bNode.parents
    );
    ()

exception DagNodeNotFound
exception DagNodeAlreadyExists

let add_node a dag =
    let aid = get_node_id dag a in
    if not (Hashtbl.mem dag.nodes aid) then
        Hashtbl.add dag.nodes aid { parents = IntSet.empty; children = IntSet.empty }

let add_node_exclusive a dag =
    let aid = get_node_id dag a in
    if Hashtbl.mem dag.nodes aid then
        raise DagNodeAlreadyExists
    else
        Hashtbl.add dag.nodes aid { parents = IntSet.empty; children = IntSet.empty }

(* has edge from a to b *)
let has_edge a b dag =
    match SafeHashtbl.find_opt dag.node_to_id a, SafeHashtbl.find_opt dag.node_to_id b with
    | Some aid, Some bid ->
        (match SafeHashtbl.find_opt dag.nodes aid, SafeHashtbl.find_opt dag.nodes bid with
        | Some aNode, Some bNode -> IntSet.mem bid aNode.children && IntSet.mem aid bNode.parents
        | _ -> false)
    | _ -> false

let del_edge a b dag =
    match SafeHashtbl.find_opt dag.node_to_id a, SafeHashtbl.find_opt dag.node_to_id b with
    | Some aid, Some bid ->
        (match SafeHashtbl.find_opt dag.nodes aid, SafeHashtbl.find_opt dag.nodes bid with
        | Some aNode, Some bNode ->
            aNode.children <- IntSet.remove bid aNode.children;
            bNode.parents  <- IntSet.remove aid bNode.parents
        | _ -> ())
    | _ -> ()

let add_edges l dag =
    List.iter (fun (n1, n2) -> add_edge n1 n2 dag) l

(*  add edges connected to each other in a list
 *  n1 -> n2 -> n3 -> ... -> nn
 *)
let add_edges_connected l dag =
    let rec loop parent nodes =
        match nodes with
        | []    -> ()
        | n::ns -> add_edge parent n dag; loop n ns
        in
    match l with
    | []    -> ()
    | x::[] -> add_node x dag
    | x::l  -> loop x l


(*  add children edges with p the parent
 *  p -> l[1], p -> l[2], ..., p -> l[n]
 *)
let add_children_edges p l dag =
    List.iter (fun x -> add_edge p x dag) l

let exists_node a dag = Hashtbl.mem dag.node_to_id a

let get_leaves dag =
    Hashtbl.fold (fun id v acc ->
        if IntSet.is_empty v.children then
            match SafeHashtbl.find_opt dag.id_to_node id with
            | Some node -> node :: acc
            | None -> acc  (* Should not happen - ID exists in nodes *)
        else acc
    ) dag.nodes []

let get_roots dag =
    Hashtbl.fold (fun id v acc ->
        if IntSet.is_empty v.parents then
            match SafeHashtbl.find_opt dag.id_to_node id with
            | Some node -> node :: acc
            | None -> acc  (* Should not happen - ID exists in nodes *)
        else acc
    ) dag.nodes []

let get_node dag a =
    match SafeHashtbl.find_opt dag.node_to_id a with
    | Some aid ->
        (match SafeHashtbl.find_opt dag.nodes aid with
        | Some node -> node
        | None -> raise DagNodeNotFound)
    | None -> raise DagNodeNotFound

let get_nodes dag =
    Hashtbl.fold (fun id _ acc ->
        match SafeHashtbl.find_opt dag.id_to_node id with
        | Some node -> node :: acc
        | None -> acc  (* Should not happen - ID exists in nodes *)
    ) dag.nodes []

let get_children dag a =
    let node = get_node dag a in
    IntSet.fold (fun id acc ->
        match SafeHashtbl.find_opt dag.id_to_node id with
        | Some n -> n :: acc
        | None -> acc  (* Should not happen - ID in children set *)
    ) node.children []

let get_parents dag a =
    let node = get_node dag a in
    IntSet.fold (fun id acc ->
        match SafeHashtbl.find_opt dag.id_to_node id with
        | Some n -> n :: acc
        | None -> acc  (* Should not happen - ID in parents set *)
    ) node.parents []

let get_children_full dag a =
    let visited = Hashtbl.create 16 in
    let result = ref [] in
    let queue = Queue.create () in
    List.iter (fun c -> Queue.push c queue) (get_children dag a);
    while not (Queue.is_empty queue) do
      let node = Queue.pop queue in
      if not (Hashtbl.mem visited node) then begin
        Hashtbl.replace visited node ();
        result := node :: !result;
        List.iter (fun c -> Queue.push c queue) (get_children dag node)
      end
    done;
    List.rev !result

let is_children dag a b = List.mem b (get_children dag a)

let rec is_children_full dag a b =
    let children = get_children dag a in
    (* either it's present here, or in one of the kiddy *)
    List.mem b children ||
    List.fold_left (fun acc child ->
        acc || is_children_full dag child b
    ) false children

let subset dag roots =
    let subdag = init () in
    let rec loop node =
        add_node node subdag;
        let children = get_children dag node in
        List.iter (fun child -> add_edge node child subdag; loop child) children
        in
    List.iter (fun root -> loop root) roots;
    subdag

let copy dag =
    let nodes = get_nodes dag in
    let dag2 = init () in
    let copy_node node =
        add_node node dag2;
        let children = get_children dag node in
        add_children_edges node children dag2
        in
    List.iter (fun node -> copy_node node) nodes;
    dag2

let merge dest src =
  let nodes = get_nodes src in
  let dups = ref [] in
  List.iter (fun node -> if exists_node node dest then dups := node :: !dups) nodes;
  let copy_node node =
    add_node node dest;
    let children = get_children src node in
    add_children_edges node children dest
  in
  List.iter (fun node -> copy_node node) nodes;
  !dups

(* O(v^3) use with care *)
let transitive_reduction dag =
    let reducedDag = copy dag in
    let nodes = get_nodes dag in
    List.iter (fun x ->
        List.iter (fun y ->
            List.iter (fun z ->
                if has_edge x y dag && has_edge y z dag
                    then del_edge x z reducedDag
                    else ()
            ) nodes
        ) nodes
    ) nodes;
    reducedDag

(* this is for debugging the DAG.
 * dump the dag links and node in a textual format *)
let dump a_to_string dag =
    let all = get_nodes dag in
    List.iter (fun n ->
        printf "%s:\n" (a_to_string n);
        printf "  | parents  = %s\n" (String.concat ", " (List.map a_to_string (get_parents dag n)));
        printf "  | children = %s\n" (String.concat ", " (List.map a_to_string (get_children dag n)))
    ) all

(* it's useful to be able to visualize the DAG with the excellent dot
 *)
let to_dot a_to_string name fromLeaf dag =
    let buf = Buffer.create 1024 in
    let nodes = get_nodes dag in
    let dotIndex = Hashtbl.create (List.length nodes) in
    let append = Buffer.add_string buf in
    let sanitizeName = bytes_of_string name in
    for i = 0 to String.length name - 1
    do
      if (bytes_get sanitizeName i) = '-'
      then bytes_set sanitizeName i '_'
    done;

    append ("digraph " ^ (bytes_to_string sanitizeName) ^ " {\n");

    let list_iteri f list =
        let rec loop i l =
            match l with
            | []    -> ()
            | x::xs -> f i x; loop (i+1) xs
            in
        loop 1 list
        in

    list_iteri (fun i n ->
        Hashtbl.add dotIndex n i;
        append (sprintf "  %d [label = \"%s\"];\n" i (a_to_string n));
    ) nodes;

    List.iter (fun n ->
        let i = Hashtbl.find dotIndex n in
        List.iter (fun child ->
            let ci = Hashtbl.find dotIndex child in
            append (sprintf "  %d -> %d;\n" i ci)
        ) ((if fromLeaf then get_parents else get_children) dag n)
    ) nodes;
    
    append "}\n";
    Buffer.contents buf