1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
#!/usr/bin/env python3
from __future__ import print_function
import networkx as nx
import sys
import gzip
from collections import defaultdict
def calcportsmetric(g, verbose=False):
if verbose:
print("calculating sccs", file=sys.stderr)
scc = list(nx.strongly_connected_components(g))
if verbose:
print("condensing", file=sys.stderr)
h = nx.condensation(g, scc)
if verbose:
print("get roots", file=sys.stderr)
i = h.reverse()
for node in nx.dfs_postorder_nodes(i):
pred = h.predecessors(node)
# do not include the version so that source packages which exist in
# multiple versions are only counted once
mynames = set([g.nodes[n]["name"] for n in h.nodes[node]["members"]])
if not pred:
h.nodes[node]["k"] = mynames
else:
h.nodes[node]["k"] = set.union(mynames, *[h.nodes[n]["k"] for n in pred])
# # slow and stupid version of the above (to check the fast version for
# # correctness)
# for node in h.nodes():
# print scc[node], [scc[n] for n in nx.dfs_tree(i,node).nodes()]
# descendents = nx.dfs_tree(i,node).nodes()
# if descendents:
# h.nodes[node]['k'] = set.union(*[set([g.nodes[v]['name']
# for v in scc[n]])
# for n in descendents])
# else:
# h.nodes[node]['k'] = set([g.nodes[n]['name'] for n in scc[node]])
if verbose:
print("getting order", file=sys.stderr)
# we want the results per source package, so we need to merge the values
# for those source packages which exist in different versions
srcmetrics = defaultdict(set)
for n in h.nodes():
a = h.nodes[n]["k"]
for node in h.nodes[n]["members"]:
srcmetrics[g.nodes[node]["name"]].update(a)
return srcmetrics
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description=(
"Calculate a metric for source package importance "
+ "based on how many source packages (minimum and "
+ "maximum) become compilable in a bootstrapping "
+ "scenario through the compilability of a given source "
+ "package"
)
)
parser.add_argument(
"strongsrcgraph",
metavar="strongsrcgraph.xml",
type=nx.read_graphml,
help="A dependency graph in GraphML format connecting source "
+ "package vertices with the source packages that build their "
+ "strong dependencies.",
)
parser.add_argument(
"closuresrcgraph",
metavar="closuresrcgraph.xml",
type=nx.read_graphml,
help="A dependency graph in GraphML format connecting source "
+ "package vertices with the source packages that build their "
+ "dependency closure.",
)
parser.add_argument(
"--online",
action="store_true",
help="Retrieve popcon results for source packages",
)
parser.add_argument("-v", "--verbose", action="store_true", help="be verbose")
args = parser.parse_args()
strongres = calcportsmetric(args.strongsrcgraph, args.verbose)
closureres = calcportsmetric(args.closuresrcgraph, args.verbose)
if args.online:
if args.verbose:
print("getting popcon results for source packages", file=sys.stderr)
if args.verbose:
print("downloading...", file=sys.stderr)
try:
import urllib.request as urllib
except ImportError:
import urllib
from io import BytesIO
r = BytesIO(urllib.urlopen("http://popcon.debian.org/source/by_inst.gz").read())
if args.verbose:
print("done downloading, opening...", file=sys.stderr)
popconvalues = dict()
with gzip.GzipFile(fileobj=r) as popconf:
# parse popcon file
for line in popconf:
if line.startswith(b"#"):
continue
tokens = line.split()
if len(tokens) != 7:
continue
popconvalues[tokens[1].decode()] = int(tokens[2])
# now replace every entry by a tuple of its length and the sum of their
# popcon values
result = dict()
for k, v in list(strongres.items()):
psum = sum([popconvalues.get(n, 0) for n in v])
strongres[k] = (len(v), psum)
for k, v in list(closureres.items()):
psum = sum([popconvalues.get(n, 0) for n in v])
closureres[k] = (len(v), psum)
if args.verbose:
print("printing results", file=sys.stderr)
# we take the set intersection because in case a package is not
# installable, then there exist no strong dependencies for it but its
# dependency closure will still be calculated. So in that case there
# may be packages which are in the closure graph but not in the source
# graph. We only want those packages that are in both graphs because
# we don't care about the subgraphs of uninstallable packages.
for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
print(
"src:%s\t%d\t%d\t%d\t%d"
% (
srcname,
strongres[srcname][0],
closureres[srcname][0],
strongres[srcname][1],
closureres[srcname][1],
)
)
else:
if args.verbose:
print("printing results", file=sys.stderr)
for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
print(
"src:%s\t%d\t%d"
% (srcname, len(strongres[srcname]), len(closureres[srcname]))
)
|