1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
#!/usr/bin/env python3
# simple check if two build or source graphs are equal
#
# two graphs are equal if they contain the same set of nodes and edges
from __future__ import print_function
import sys
sys.path.append("/usr/share/botch")
from util import read_graph
def graph_difference(g, h, verbose=False):
if g.number_of_edges() != h.number_of_edges():
print(
"g has %d edges and h as %d edges"
% (g.number_of_edges(), h.number_of_edges())
)
return False
if g.number_of_nodes() != h.number_of_nodes():
print(
"g has %d nodes and h as %d nodes"
% (g.number_of_nodes(), h.number_of_nodes())
)
return False
def normalize_node(attr):
# We delete the cudfversion attribute because vertices should be unique
# by their name and version and not by the cudfversion which may have
# been differently assigned
if attr.get("version") is not None and "cudfversion" in attr:
del attr["cudfversion"]
if attr.get("kind") == "SCC":
if not attr.get("binaries"):
raise Exception("nodes of kind SCC need the sources attribute")
attr["sources"] = frozenset([s for s in attr["sources"].split(",")])
elif attr.get("kind") == "InstSet":
if not attr.get("binaries"):
raise Exception("nodes of kind InstSet need the binaries attribute")
attr["binaries"] = frozenset([p for p in attr["binaries"].split(",")])
elif attr.get("kind") == "SrcPkg":
pass
return frozenset(attr.items())
def normalize_edge(attr):
# we delete the id attribute as it has no meaning. Edges are unique by
# the vertices they connect because there can be no multi-edges
if "id" in attr:
del attr["id"]
# only buildgraphs have the kind attribute
if attr.get("kind"):
if attr["kind"] == "buildsfrom":
if not attr.get("binaries"):
raise Exception(
"edges of kind buildsfrom need the binaries attribute"
)
attr["binaries"] = frozenset([s for s in attr["binaries"].split(",")])
elif attr["kind"] == "builddep":
pass
else:
raise Exception("unknown edge type %s" % attr["kind"])
else:
# this is a srcgraph
if attr.get("binaries"):
attr["binaries"] = frozenset([s for s in attr["binaries"].split(",")])
if attr.get("strong"):
attr["strong"] = frozenset([s for s in attr["strong"].split(",")])
if attr.get("strong_direct"):
attr["strong_direct"] = frozenset(
[s for s in attr["strong_direct"].split(",")]
)
if attr.get("annotation"):
attr["annotation"] = frozenset([s for s in attr["annotation"].split(",")])
return frozenset(attr.items())
g_nodes = set([(n, normalize_node(attr)) for n, attr in g.nodes(data=True)])
h_nodes = set([(n, normalize_node(attr)) for n, attr in h.nodes(data=True)])
g_edges = set(
[(n1, n2, normalize_edge(attr)) for n1, n2, attr in g.edges(data=True)]
)
h_edges = set(
[(n1, n2, normalize_edge(attr)) for n1, n2, attr in h.edges(data=True)]
)
if h_nodes == g_nodes and g_edges == h_edges:
return True
print(
"the graphs disagree on either the node naming, their values or "
+ "the graph structure",
file=sys.stderr,
)
# if this test did not succeed, then maybe just the node names are
# different but their content actually matches:
g_node_vals = set([a for _, a in g_nodes])
h_node_vals = set([a for _, a in h_nodes])
if g_node_vals != h_node_vals:
print("the set of node attributes of g and h differ")
if g_node_vals - h_node_vals:
print("nodes in g but not in h", g_node_vals - h_node_vals)
if h_node_vals - g_node_vals:
print("nodes in h but not in g", h_node_vals - g_node_vals)
return False
if len(g_nodes) != len(g_node_vals) or len(h_nodes) != len(h_node_vals):
print(
"there exist duplicate node attributes - this should not "
+ "happen for either build or source graphs",
file=sys.stderr,
)
return False
print(
"the graphs disagree on either the node naming or the graph " + "structure",
file=sys.stderr,
)
# we now know that the set of node attributes is equal and that there are
# no duplicates
# thus we can now create a mapping between node ids of the two graphs
# this dictionary stores for each attribute its node id in h
node_mapping_h = {attr: n for n, attr in h_nodes}
# this dictionary stores for each node id in g the node id in h
node_mapping = {n: node_mapping_h[attr] for n, attr in g_nodes}
# now go over all edges in g and check if they are in h as well
# also check whether the attributes are equal
# we do not check the other way round because we know that the number of
# edges in both graphs is equal
for n1, n2, attr in g_edges:
n3 = node_mapping[n1]
n4 = node_mapping[n2]
if not h.has_edge(n3, n4):
print("edge in g but not in h", file=sys.stderr)
print(g.adj[n1][n2])
return False
if attr != frozenset(h.adj[n3][n4].items()):
print("edge attributes do not agree", file=sys.stderr)
print(attr)
print(h.adj[n3][n4])
return False
return True
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description=("Check if two graphs are the same"))
parser.add_argument("g", type=read_graph, help="graph g in GraphML or dot format")
parser.add_argument("h", type=read_graph, help="graph h in GraphML or dot format")
parser.add_argument("-v", "--verbose", action="store_true", help="be verbose")
args = parser.parse_args()
# TODO: also support graphs that are neither buildgraph nor srcgraph
ret = graph_difference(args.g, args.h, args.verbose)
exit(not ret)
|