#!/usr/bin/env python3
# simple check if two build or source graphs are equal
#
# two graphs are equal if they contain the same set of nodes and edges

from __future__ import print_function
import sys

sys.path.append("/usr/share/botch")
from util import read_graph


def graph_difference(g, h, verbose=False):
    if g.number_of_edges() != h.number_of_edges():
        print(
            "g has %d edges and h as %d edges"
            % (g.number_of_edges(), h.number_of_edges())
        )
        return False

    if g.number_of_nodes() != h.number_of_nodes():
        print(
            "g has %d nodes and h as %d nodes"
            % (g.number_of_nodes(), h.number_of_nodes())
        )
        return False

    def normalize_node(attr):
        # We delete the cudfversion attribute because vertices should be unique
        # by their name and version and not by the cudfversion which may have
        # been differently assigned
        if attr.get("version") is not None and "cudfversion" in attr:
            del attr["cudfversion"]
        if attr.get("kind") == "SCC":
            if not attr.get("binaries"):
                raise Exception("nodes of kind SCC need the sources attribute")
            attr["sources"] = frozenset([s for s in attr["sources"].split(",")])
        elif attr.get("kind") == "InstSet":
            if not attr.get("binaries"):
                raise Exception("nodes of kind InstSet need the binaries attribute")
            attr["binaries"] = frozenset([p for p in attr["binaries"].split(",")])
        elif attr.get("kind") == "SrcPkg":
            pass
        return frozenset(attr.items())

    def normalize_edge(attr):
        # we delete the id attribute as it has no meaning. Edges are unique by
        # the vertices they connect because there can be no multi-edges
        if "id" in attr:
            del attr["id"]
        # only buildgraphs have the kind attribute
        if attr.get("kind"):
            if attr["kind"] == "buildsfrom":
                if not attr.get("binaries"):
                    raise Exception(
                        "edges of kind buildsfrom need the binaries attribute"
                    )
                attr["binaries"] = frozenset([s for s in attr["binaries"].split(",")])
            elif attr["kind"] == "builddep":
                pass
            else:
                raise Exception("unknown edge type %s" % attr["kind"])
        else:
            # this is a srcgraph
            if attr.get("binaries"):
                attr["binaries"] = frozenset([s for s in attr["binaries"].split(",")])
            if attr.get("strong"):
                attr["strong"] = frozenset([s for s in attr["strong"].split(",")])
            if attr.get("strong_direct"):
                attr["strong_direct"] = frozenset(
                    [s for s in attr["strong_direct"].split(",")]
                )
        if attr.get("annotation"):
            attr["annotation"] = frozenset([s for s in attr["annotation"].split(",")])
        return frozenset(attr.items())

    g_nodes = set([(n, normalize_node(attr)) for n, attr in g.nodes(data=True)])
    h_nodes = set([(n, normalize_node(attr)) for n, attr in h.nodes(data=True)])
    g_edges = set(
        [(n1, n2, normalize_edge(attr)) for n1, n2, attr in g.edges(data=True)]
    )
    h_edges = set(
        [(n1, n2, normalize_edge(attr)) for n1, n2, attr in h.edges(data=True)]
    )

    if h_nodes == g_nodes and g_edges == h_edges:
        return True

    print(
        "the graphs disagree on either the node naming, their values or "
        + "the graph structure",
        file=sys.stderr,
    )

    # if this test did not succeed, then maybe just the node names are
    # different but their content actually matches:

    g_node_vals = set([a for _, a in g_nodes])
    h_node_vals = set([a for _, a in h_nodes])

    if g_node_vals != h_node_vals:
        print("the set of node attributes of g and h differ")
        if g_node_vals - h_node_vals:
            print("nodes in g but not in h", g_node_vals - h_node_vals)
        if h_node_vals - g_node_vals:
            print("nodes in h but not in g", h_node_vals - g_node_vals)
        return False

    if len(g_nodes) != len(g_node_vals) or len(h_nodes) != len(h_node_vals):
        print(
            "there exist duplicate node attributes - this should not "
            + "happen for either build or source graphs",
            file=sys.stderr,
        )
        return False

    print(
        "the graphs disagree on either the node naming or the graph " + "structure",
        file=sys.stderr,
    )

    # we now know that the set of node attributes is equal and that there are
    # no duplicates
    # thus we can now create a mapping between node ids of the two graphs

    # this dictionary stores for each attribute its node id in h
    node_mapping_h = {attr: n for n, attr in h_nodes}

    # this dictionary stores for each node id in g the node id in h
    node_mapping = {n: node_mapping_h[attr] for n, attr in g_nodes}

    # now go over all edges in g and check if they are in h as well
    # also check whether the attributes are equal
    # we do not check the other way round because we know that the number of
    # edges in both graphs is equal
    for n1, n2, attr in g_edges:
        n3 = node_mapping[n1]
        n4 = node_mapping[n2]
        if not h.has_edge(n3, n4):
            print("edge in g but not in h", file=sys.stderr)
            print(g.adj[n1][n2])
            return False
        if attr != frozenset(h.adj[n3][n4].items()):
            print("edge attributes do not agree", file=sys.stderr)
            print(attr)
            print(h.adj[n3][n4])
            return False

    return True


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description=("Check if two graphs are the same"))
    parser.add_argument("g", type=read_graph, help="graph g in GraphML or dot format")
    parser.add_argument("h", type=read_graph, help="graph h in GraphML or dot format")
    parser.add_argument("-v", "--verbose", action="store_true", help="be verbose")
    args = parser.parse_args()
    # TODO: also support graphs that are neither buildgraph nor srcgraph
    ret = graph_difference(args.g, args.h, args.verbose)
    exit(not ret)
