File: buildGraphExtras.ml

package info (click to toggle)
botch 0.24-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,084,624 kB
  • sloc: xml: 11,924,892; ml: 4,489; python: 3,890; sh: 1,268; makefile: 334
file content (464 lines) | stat: -rw-r--r-- 20,861 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) 2012 Johannes 'josch' Schauer <j.schauer@email.de>      *)
(*  Copyright (C) 2012 Pietro Abate <pietro.abate@pps.jussieu.fr>         *)
(*                                                                        *)
(*  This library is free software: you can redistribute it and/or modify  *)
(*  it under the terms of the GNU Lesser General Public License as        *)
(*  published by the Free Software Foundation, either version 3 of the    *)
(*  License, or (at your option) any later version.  A special linking    *)
(*  exception to the GNU Lesser General Public License applies to this    *)
(*  library, see the COPYING file for more information.                   *)
(**************************************************************************)

open ExtLib
open Dose_common

module IntSet = BootstrapCommon.IntSet
module StringSet = BootstrapCommon.StringSet

module Make (U : sig val univ : Cudf.universe end) = struct
  module G = BuildGraph.G
  module FindCyclesG = GraphUtils.FindCycles(G)
  module GraphUtilsG = GraphUtils.GraphUtils(G)
  module Dfs = Graph.Traverse.Dfs(G)
  module VertexSet = BuildGraph.VertexSet
  module EdgeSet = BuildGraph.EdgeSet

  let int_of_vertex v =
    let vertex = BuildGraph.Unique.value v in
    match vertex with
    | BuildGraph.SrcPkg id | BuildGraph.InstSet (id,_) -> id

  let pkg_of_vertex v =
    let vertex = BuildGraph.Unique.value v in
    match vertex with
    | BuildGraph.SrcPkg id | BuildGraph.InstSet (id,_) ->
      CudfAdd.inttopkg U.univ id

  let string_of_vertex ?(noversion=false) v =
    let pkg = pkg_of_vertex v in
    BootstrapCommon.string_of_package ~noversion pkg

  let variant_of_vertex v =
    let v = BuildGraph.Unique.value v in
    match v with
    | BuildGraph.SrcPkg id -> begin
        let pkg = CudfAdd.inttopkg U.univ id in
        (`SrcPkg (BootstrapCommon.debsrctuple_of_cudfpkg pkg))
      end
    | BuildGraph.InstSet (id,iss) -> begin
        let pkg = CudfAdd.inttopkg U.univ id in
        let sl =
          List.map (fun pid ->
              let pkg = CudfAdd.inttopkg U.univ pid in
              BootstrapCommon.debbintriplet_of_cudfpkg pkg
            ) (IntSet.elements iss)
        in
        (`InstSet (BootstrapCommon.debbintriplet_of_cudfpkg pkg, sl))
      end

  let intset_of_vset s =
    BuildGraph.VertexSet.fold (fun v acc -> IntSet.add (int_of_vertex v) acc) s IntSet.empty

  let intset_of_vlist l =
    List.fold_left (fun acc v -> IntSet.add (int_of_vertex v) acc) IntSet.empty l

  (* make the list unique because pkg_of_vertex omits the installation set and
   * might thus otherwise return duplicate entries *)
  let pkglist_of_vlist l =
    BootstrapCommon.CudfSet.elements
      (List.fold_right
         (fun v -> BootstrapCommon.CudfSet.add (pkg_of_vertex v)) l
         BootstrapCommon.CudfSet.empty)

  let variantlist_of_vlist l =
    List.fold_right (fun v acc -> (variant_of_vertex v)::acc) l []

  let srcpkglist_of_g g =
    G.fold_vertex (fun v acc ->
        let vertex = BuildGraph.Unique.value v in
        match vertex with
        | BuildGraph.SrcPkg id ->
          (CudfAdd.inttopkg U.univ id)::acc
        | _ -> acc
      ) g []

  (* remove all build dependencies that are marked as removable by
   * reduced_deps_ht from the source package nodes in g *)
  let remove_build_deps reduced_deps_ht g =
    G.iter_vertex (fun v1 ->
      let vertex1 = BuildGraph.Unique.value v1 in
      match vertex1 with
        | BuildGraph.SrcPkg id -> begin
            let p1 = CudfAdd.inttopkg U.univ id in
            let droppable = Hashtbl.find_default reduced_deps_ht p1.Cudf.package StringSet.empty in
            if StringSet.is_empty droppable then ()
            else begin
              G.iter_succ (fun v2 ->
                let vertex2 = BuildGraph.Unique.value v2 in
                match vertex2 with
                  | BuildGraph.InstSet (id,_) ->
                      let p2 = CudfAdd.inttopkg U.univ id in
                      if StringSet.mem p2.Cudf.package droppable then
                        G.remove_edge g v1 v2;
                  | _ -> failwith("impossible")
              ) g v1
            end;
          end
        | _ -> ();
    ) g

  (* remove all installation sets which are weak build dependencies as indicated
   * by weak_deps_set *)
  let remove_inst_sets weak_deps_set g =
    G.iter_vertex (fun v ->
      let vertex = BuildGraph.Unique.value v in
      match vertex with
        | BuildGraph.InstSet (id,_) ->
            let pkg = CudfAdd.inttopkg U.univ id in
            if StringSet.mem pkg.Cudf.package weak_deps_set then
              G.remove_vertex g v
        | _ -> ()
    ) g

  (* given a graph and a list of cycles in it, return a set of edges that remove
   * all those cycles by iteratively removing the edge that is shared by most
   * cycles. *)
  let calculate_partial_fas g cycles =
    let hist = Hashtbl.create (G.nb_edges g) in
    (* create a hashtable mapping edges to a set of integers where each integer
     * maps to the cycle that this edge is part of *)
    List.iteri (fun i cycle ->
      let edges = FindCyclesG.edge_cycle_from_vertex_cycle g cycle in
      List.iter (fun edge ->
        match edge with
          | (_,{ BuildGraph.depend = BuildGraph.BuildDep },_) ->
              Hashtbl.replace hist edge (IntSet.add i (Hashtbl.find_default hist edge IntSet.empty));
          | _ -> (); (* ignore builds-from edges *)
      ) edges;
    ) cycles;
    let rec remove_most_popular_edge acc =
      (* get the edge that is part of the most cycles *)
      match List.of_enum (Hashtbl.enum hist) with
        | [] -> acc (* it might be that no cycles of this length can be broken *)
        | hd::tl -> begin
            let max_edge,cids = List.fold_left (fun (k1,v1) (k2,v2) ->
                let diff = (IntSet.cardinal v1) - (IntSet.cardinal v2) in
                if diff < 0 then k2,v2
                else if diff > 0 then k1,v1
                else match k1,k2 with
                  | (sv1,{BuildGraph.depend = BuildGraph.BuildDep},iv1),(sv2,{BuildGraph.depend = BuildGraph.BuildDep},iv2) ->
                    let diff = (BuildGraph.Unique.uid sv1) - (BuildGraph.Unique.uid sv2) in
                    if diff < 0 then k2,v2
                    else if diff > 0 then k1,v1
                    else
                      let diff = (BuildGraph.Unique.uid iv1) - (BuildGraph.Unique.uid iv2) in
                      if diff < 0 then k2,v2
                      else k1,v1
                  | _ -> failwith "impossible"
            ) hd tl in
            (* end if the edge with the most cycles has zero cycles *)
            if (IntSet.cardinal cids) = 0 then
              acc
            else begin
              (* remove those cycle ids from all sets *)
              Hashtbl.iter (fun edge set ->
                Hashtbl.replace hist edge (IntSet.diff set cids)
              ) hist;
              (* add edge to feedback arc set *)
              remove_most_popular_edge (EdgeSet.add max_edge acc)
            end
          end
    in
    remove_most_popular_edge EdgeSet.empty

  (* turn a feedback arc set into a vertex ordering 
   * since there are many topological orderings for a given acyclic graph, take 
   * care to choose the order which keeps the feedback arc set small 
   * for this reason, instead of using Graph.Topological,
   * GraphUtils.get_partial_order is used. All vertices within each group
   * returned by this function are then ordered such that the cardinality of the
   * given feedback arc set is reduced. *)
  let getorder fas g =
    (* fasverts is a hashtable which maps vertices which are the source of edges
     * in the feedback arc set to a list of vertices which are destinations of
     * edges in the feedback arc set. *)
    let fasverts = Hashtbl.create (EdgeSet.cardinal fas) in
    EdgeSet.iter (fun e ->
      let src = G.E.src e in let dst = G.E.dst e in
      if src <> dst then (* ignore selfcycles as they don't influence the order *)
        Hashtbl.add fasverts src (dst,e)
    ) fas;
    (* go through all vertex lists returned by GraphUtilsG.get_partial_order and
     * sort all vertices in this list which make edges in the feedback arc set, 
     * such that those edges are removed *)
    List.fold_left (fun acc l ->
      (* get all the edges that are part of this list for lookup later *)
      let localverts = Hashtbl.create (List.length l) in
      List.iter (fun v -> Hashtbl.add localverts v ()) l;
      (* create a graph that only contains those feedback arcs whose source and 
       * destination are in the current vertex list. The resulting graph might
       * be cyclic if the feedback arc set was really bad but at this point we
       * don't care*)
      let g = G.create () in
      List.iter (fun v1 ->
        List.iter (fun (v2,e) ->
          if Hashtbl.mem localverts v2 then G.add_edge_e g e
        ) (Hashtbl.find_all fasverts v1)
      ) l;
      if Dfs.has_cycle g then
        failwith "fas has forward and backward edge (creating a cycle) we don't handle this yet";
      (* get the topological order of the vertices in the graph *)
      let vlist = BuildGraph.T.fold (fun v acc -> v::acc) g [] in
      (* concatenate all vertices that are not part of the graph above *)
      let acc = List.fold_left (fun acc v ->
        if not (List.mem v vlist) then v::acc else acc
      ) acc l in
      List.rev_append vlist acc
    ) [] (GraphUtilsG.get_partial_order g)
  ;;

  let ordertofas order g =
    (* check if the order can match the graph *)
    if (List.length order) <> (G.nb_vertex g) then
      failwith "invalid vertex order (length differs)";
    let seen = Hashtbl.create (List.length order) in
    List.fold_left (fun acc v ->
      Hashtbl.add seen v (); (* because of edges in self cycles *)
      G.fold_succ_e (fun edge acc ->
        if Hashtbl.mem seen (G.E.dst edge) then
          EdgeSet.add edge acc (* this vertex has already been processed, so it is a backarc*)
        else
          acc
      ) g v acc
    ) EdgeSet.empty order
  ;;

  (* get a feedback arc set by repeated application of calculate_partial_fas
   * the maxlength parameter controls the initial cycle length passed to
   * calculate_partial_fas and will be incremented by two at every iteration *)
  let calculate_fas ?(maxlength=4) g_orig =
    let g = GraphUtilsG.copy_graph g_orig in
    let remove_edgeset g es =
      EdgeSet.iter (fun edge ->
        G.remove_edge_e g edge;
      ) es;
    in
    (* first find and remove all cycles of length two by removing the according
     * build dependency
     * this step is not optional as this is the only way to break those cycles
     * if it turns out that build dependency can't be removed, then the only
     * alternative to break the cycle is cross compilation *)
    let fixed_fas = List.fold_left (fun acc cycle ->
      let edges = FindCyclesG.edge_cycle_from_vertex_cycle g cycle in
      match edges with
        | [(_,{ BuildGraph.depend = BuildGraph.BuildDep },_) as bd;(_,{ BuildGraph.depend = BuildGraph.BuildsFrom _ },_)]
        | [(_,{ BuildGraph.depend = BuildGraph.BuildsFrom _ },_);(_,{ BuildGraph.depend = BuildGraph.BuildDep },_) as bd] ->
            EdgeSet.add bd acc
        | _ -> failwith "wrong cycle type"
    ) EdgeSet.empty (FindCyclesG.johnson ~maxlength:2 g) in
    (* remove the found edges from the graph *)
    remove_edgeset g fixed_fas;
    (* apply calculate_partial_fas on the graph, remove the resulting edges and
     * increment the max cycle length each time until the graph is loop free *)
    let rec foo ml acc =
      if Dfs.has_cycle g then begin
        let cycles = FindCyclesG.johnson ~maxlength:ml g in
        match cycles with
          | [] ->
              foo (ml+2) acc
          | l ->
              let partial_fas = calculate_partial_fas g l in
              remove_edgeset g partial_fas;
              foo (ml+2) (EdgeSet.union partial_fas acc)
      end else
        acc
    in
    let fas = EdgeSet.union fixed_fas (foo maxlength EdgeSet.empty) in
    let order = getorder fas g in
    let fas = ordertofas order g_orig in
    fas

  (* profile builds a list of source vertices by modifying the graph accordingly *)
  let profile_build reduced_deps_ht g vs =
    VertexSet.iter (fun v1 ->
      let vertex1 = BuildGraph.Unique.value v1 in
      let src = match vertex1 with
        | BuildGraph.SrcPkg id -> CudfAdd.inttopkg U.univ id
        | _ -> failwith "impossible"
      in
      let droppable = Hashtbl.find reduced_deps_ht src.Cudf.package in
      G.iter_succ (fun v2 ->
        let vertex2 = BuildGraph.Unique.value v2 in
        match vertex2 with
          | BuildGraph.InstSet (id,_) ->
              let pkg = CudfAdd.inttopkg U.univ id in
              if StringSet.mem pkg.Cudf.package droppable then
                G.remove_edge g v1 v2
          | _ -> failwith "impossible"
      ) g v1;
    ) vs

  (* given a list of cycles, calculates a partial feedback vertex set where the
   * resulting list of vertices represent the source package which, if profile
   * built, break all those cycles *)
  let calculate_partial_fvs reduced_deps_ht g cycles =
    let hist = Hashtbl.create (G.nb_vertex g) in
    (* create a hashtable mapping source vertices to a set of integers where each
     * integer maps to the cycle that this vertex is part of and which can be
     * broken by its build profile *)
    List.iteri (fun i cycle ->
      let edges = FindCyclesG.edge_cycle_from_vertex_cycle g cycle in
      List.iter (fun edge ->
        match edge with
          | (sv,{ BuildGraph.depend = BuildGraph.BuildDep },iv) -> begin
              let s = BuildGraph.Unique.value sv in
              let src = match s with
                | BuildGraph.SrcPkg id -> CudfAdd.inttopkg U.univ id
                | _ -> failwith "impossible"
              in
              let p = BuildGraph.Unique.value iv in
              let pkg = match p with
                | BuildGraph.InstSet (id,_) -> CudfAdd.inttopkg U.univ id
                | _ -> failwith "impossible"
              in
              (* don't do anything if the source package doesnt have a build
               * profile *)
              try
                let droppable = Hashtbl.find reduced_deps_ht src.Cudf.package in
                (* if this build-depends edge is droppable by a profile, add the
                 * integer of the cycle to the source package *)
                if StringSet.mem pkg.Cudf.package droppable then
                  Hashtbl.replace hist sv (IntSet.add i (Hashtbl.find_default hist sv IntSet.empty));
              with Not_found -> ()
            end
          | _ -> (); (* ignore builds-from edges *)
      ) edges;
    ) cycles;
    let rec remove_most_popular_source acc =
      (* get the source that removes the most cycles if profile built *)
      let l = List.of_enum (Hashtbl.enum hist) in
      (* we sort the list to be sure that the order in which items have been
       * added does not matter *)
      (* sort in decreasing order so that we can just pick the first element
       * later *)
      let l = List.sort ~cmp:(fun (k1,v1) (k2,v2) ->
          (* the set of less cardinality goes first *)
          let c1 = IntSet.cardinal v1 in
          let c2 = IntSet.cardinal v2 in
          if c1 < c2 then 1
          else if c1 > c2 then -1
          else begin
            (* when there is a tie, the package referenced by the vertex is
             * taken *)
            let s1 = BuildGraph.Unique.value k1 in
            let s2 = BuildGraph.Unique.value k2 in
            match s1,s2 with
            | (BuildGraph.SrcPkg id1, BuildGraph.SrcPkg id2) ->
              if id1 < id2 then 1 else if id1 > id2 then -1 else 0
            | _ -> failwith "impossible"
          end
        ) l in
      match l with
        | [] -> acc (* it might be that no cycles of this length can be broken *)
        | (max_vert,cids)::_ -> begin
          (* end if the source with the most cycles has zero cycles *)
          if (IntSet.cardinal cids) = 0 then
            acc
          else begin
            (* remove those cycle ids from all sets *)
            Hashtbl.iter (fun vert set ->
              Hashtbl.replace hist vert (IntSet.diff set cids)
            ) hist;
            (* add source to feedback vertex set *)
            remove_most_popular_source (VertexSet.add max_vert acc)
          end
      end
    in
    remove_most_popular_source VertexSet.empty

  (* calculate a feedback vertex set where the returned source vertices will
   * break all cycles if they are profile built *)
  let calculate_fvs ?(maxlength=4) reduced_deps_ht g =
    let g = GraphUtilsG.copy_graph g in
    (* first find and remove all cycles of length two by removing the according 
     * build dependency
     * this step is not optional as this is the only way to break those cycles
     * if it turns out that build dependency can't be removed, then the only
     * alternative to break the cycle is cross compilation *)
    let fixed_fvs = List.map (fun cycle ->
      let edges = FindCyclesG.edge_cycle_from_vertex_cycle g cycle in
      match edges with
        | [(sv,{ BuildGraph.depend = BuildGraph.BuildDep },iv);(_,{ BuildGraph.depend = BuildGraph.BuildsFrom _ },_)]
        | [(_,{ BuildGraph.depend = BuildGraph.BuildsFrom _ },_);(sv,{ BuildGraph.depend = BuildGraph.BuildDep },iv)] ->
            let s = BuildGraph.Unique.value sv in
            let src = match s with
              | BuildGraph.SrcPkg id -> CudfAdd.inttopkg U.univ id
              | _ -> failwith "impossible"
            in
            let p = BuildGraph.Unique.value iv in
            let pkg = match p with
              | BuildGraph.InstSet (id,_) -> CudfAdd.inttopkg U.univ id
              | _ -> failwith "impossible"
            in
            let droppable = try
              Hashtbl.find reduced_deps_ht src.Cudf.package
            with Not_found ->
              failwith (Printf.sprintf "source package %s does not seem to have reduced deps" (CudfAdd.decode src.Cudf.package))
            in
            if StringSet.mem pkg.Cudf.package droppable then
              sv
            else
              failwith (Printf.sprintf "build dependency %s of source package %s cannot be dropped but has to because it's a 2-cycle. Consider cross compilation?" (CudfAdd.decode pkg.Cudf.package) (CudfAdd.decode src.Cudf.package))
        | _ -> failwith "wrong cycle type"
    ) (FindCyclesG.johnson ~maxlength:2 g) in
    let fixed_fvs = List.fold_right VertexSet.add fixed_fvs VertexSet.empty in
    (* remove the found edges from the graph *)
    profile_build reduced_deps_ht g fixed_fvs;
    (* apply calculate_partial_fas on the graph, remove the resulting edges and 
     * increment the max cycle length each time until the graph is loop free *) 
    let rec foo ml acc =
      if Dfs.has_cycle g then begin
        let cycles = FindCyclesG.johnson ~maxlength:ml g in
        match cycles with
          | [] ->
              foo (ml+2) acc
          | l ->
              let partial_fvs = calculate_partial_fvs reduced_deps_ht g l in
              profile_build reduced_deps_ht g partial_fvs;
              foo (ml+2) (VertexSet.union partial_fvs acc)
      end else
        acc
    in
    VertexSet.union fixed_fvs (foo maxlength VertexSet.empty)


  let annotate is_strong g =
    let ag = G.create () in
    G.iter_edges_e (fun (v1,label,v2) ->
      let vertex1 = BuildGraph.Unique.value v1 in
      let vertex2 = BuildGraph.Unique.value v2 in
      match (vertex1,label,vertex2) with
        | (BuildGraph.SrcPkg sid, {BuildGraph.depend = BuildGraph.BuildDep}, BuildGraph.InstSet (bid,_)) -> begin
            let srcpkg = CudfAdd.inttopkg U.univ sid in
            let binpkg = CudfAdd.inttopkg U.univ bid in
            if is_strong srcpkg binpkg then
              G.add_edge_e ag (v1,{BuildGraph.depend = BuildGraph.BuildDep; annotation = [`StrongDep]},v2)
            else
              G.add_edge_e ag (v1,label,v2)
          end
        | (BuildGraph.InstSet (bid,_), {BuildGraph.depend = BuildGraph.BuildsFrom {contents = s}}, _) -> begin
            let binpkg = CudfAdd.inttopkg U.univ bid in
            let binpkgs = IntSet.fold (fun id acc ->
              (CudfAdd.inttopkg U.univ id)::acc
            ) s [] in
            if List.exists (fun pkg -> is_strong binpkg pkg) binpkgs then
              G.add_edge_e ag (v1,{BuildGraph.depend = BuildGraph.BuildsFrom {contents = s}; annotation = [`StrongDep]},v2)
            else
              G.add_edge_e ag (v1,label,v2)
          end
        | _ -> failwith "impossible"
    ) g;
    ag
end