File: calcportsmetric.py

package info (click to toggle)
botch 0.24-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,084,624 kB
  • sloc: xml: 11,924,892; ml: 4,489; python: 3,890; sh: 1,268; makefile: 334
file content (151 lines) | stat: -rwxr-xr-x 5,895 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3

from __future__ import print_function
import networkx as nx
import sys
import gzip
from collections import defaultdict


def calcportsmetric(g, verbose=False):
    if verbose:
        print("calculating sccs", file=sys.stderr)
    scc = list(nx.strongly_connected_components(g))
    if verbose:
        print("condensing", file=sys.stderr)
    h = nx.condensation(g, scc)
    if verbose:
        print("get roots", file=sys.stderr)
    i = h.reverse()
    for node in nx.dfs_postorder_nodes(i):
        pred = h.predecessors(node)
        # do not include the version so that source packages which exist in
        # multiple versions are only counted once
        mynames = set([g.nodes[n]["name"] for n in h.nodes[node]["members"]])
        if not pred:
            h.nodes[node]["k"] = mynames
        else:
            h.nodes[node]["k"] = set.union(mynames, *[h.nodes[n]["k"] for n in pred])
    # # slow and stupid version of the above (to check the fast version for
    # # correctness)
    # for node in h.nodes():
    # print scc[node], [scc[n] for n in nx.dfs_tree(i,node).nodes()]
    #    descendents = nx.dfs_tree(i,node).nodes()
    #    if descendents:
    #        h.nodes[node]['k'] = set.union(*[set([g.nodes[v]['name']
    #                                             for v in scc[n]])
    #                                        for n in descendents])
    #    else:
    #        h.nodes[node]['k'] = set([g.nodes[n]['name'] for n in scc[node]])
    if verbose:
        print("getting order", file=sys.stderr)
    # we want the results per source package, so we need to merge the values
    # for those source packages which exist in different versions
    srcmetrics = defaultdict(set)
    for n in h.nodes():
        a = h.nodes[n]["k"]
        for node in h.nodes[n]["members"]:
            srcmetrics[g.nodes[node]["name"]].update(a)
    return srcmetrics


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=(
            "Calculate a metric for source package importance "
            + "based on how many source packages (minimum and "
            + "maximum) become compilable in a bootstrapping "
            + "scenario through the compilability of a given source "
            + "package"
        )
    )
    parser.add_argument(
        "strongsrcgraph",
        metavar="strongsrcgraph.xml",
        type=nx.read_graphml,
        help="A dependency graph in GraphML format connecting source "
        + "package vertices with the source packages that build their "
        + "strong dependencies.",
    )
    parser.add_argument(
        "closuresrcgraph",
        metavar="closuresrcgraph.xml",
        type=nx.read_graphml,
        help="A dependency graph in GraphML format connecting source "
        + "package vertices with the source packages that build their "
        + "dependency closure.",
    )
    parser.add_argument(
        "--online",
        action="store_true",
        help="Retrieve popcon results for source packages",
    )
    parser.add_argument("-v", "--verbose", action="store_true", help="be verbose")
    args = parser.parse_args()
    strongres = calcportsmetric(args.strongsrcgraph, args.verbose)
    closureres = calcportsmetric(args.closuresrcgraph, args.verbose)

    if args.online:
        if args.verbose:
            print("getting popcon results for source packages", file=sys.stderr)
        if args.verbose:
            print("downloading...", file=sys.stderr)
        try:
            import urllib.request as urllib
        except ImportError:
            import urllib
        from io import BytesIO

        r = BytesIO(urllib.urlopen("http://popcon.debian.org/source/by_inst.gz").read())
        if args.verbose:
            print("done downloading, opening...", file=sys.stderr)
        popconvalues = dict()
        with gzip.GzipFile(fileobj=r) as popconf:
            # parse popcon file
            for line in popconf:
                if line.startswith(b"#"):
                    continue
                tokens = line.split()
                if len(tokens) != 7:
                    continue
                popconvalues[tokens[1].decode()] = int(tokens[2])

        # now replace every entry by a tuple of its length and the sum of their
        # popcon values
        result = dict()
        for k, v in list(strongres.items()):
            psum = sum([popconvalues.get(n, 0) for n in v])
            strongres[k] = (len(v), psum)
        for k, v in list(closureres.items()):
            psum = sum([popconvalues.get(n, 0) for n in v])
            closureres[k] = (len(v), psum)

        if args.verbose:
            print("printing results", file=sys.stderr)
        # we take the set intersection because in case a package is not
        # installable, then there exist no strong dependencies for it but its
        # dependency closure will still be calculated. So in that case there
        # may be packages which are in the closure graph but not in the source
        # graph. We only want those packages that are in both graphs because
        # we don't care about the subgraphs of uninstallable packages.
        for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
            print(
                "src:%s\t%d\t%d\t%d\t%d"
                % (
                    srcname,
                    strongres[srcname][0],
                    closureres[srcname][0],
                    strongres[srcname][1],
                    closureres[srcname][1],
                )
            )
    else:
        if args.verbose:
            print("printing results", file=sys.stderr)
        for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
            print(
                "src:%s\t%d\t%d"
                % (srcname, len(strongres[srcname]), len(closureres[srcname]))
            )