## File: cluster.py

package info (click to toggle)
python-networkx 1.7~rc1-3
 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 # -*- coding: utf-8 -*- """Algorithms to characterize the number of triangles in a graph.""" from itertools import combinations import networkx as nx from networkx import NetworkXError __author__ = """\n""".join(['Aric Hagberg ', 'Dan Schult (dschult@colgate.edu)', 'Pieter Swart (swart@lanl.gov)', 'Jordi Torrents ']) # Copyright (C) 2004-2011 by # Aric Hagberg # Dan Schult # Pieter Swart # All rights reserved. # BSD license. __all__= ['triangles', 'average_clustering', 'clustering', 'transitivity', 'square_clustering'] def triangles(G, nodes=None): """Compute the number of triangles. Finds the number of triangles that include a node as one vertex. Parameters ---------- G : graph A networkx graph nodes : container of nodes, optional (default= all nodes in G) Compute triangles for nodes in this container. Returns ------- out : dictionary Number of triangles keyed by node label. Examples -------- >>> G=nx.complete_graph(5) >>> print(nx.triangles(G,0)) 6 >>> print(nx.triangles(G)) {0: 6, 1: 6, 2: 6, 3: 6, 4: 6} >>> print(list(nx.triangles(G,(0,1)).values())) [6, 6] Notes ----- When computing triangles for the entire graph each triangle is counted three times, once at each node. Self loops are ignored. """ if G.is_directed(): raise NetworkXError("triangles() is not defined for directed graphs.") if nodes in G: # return single value return next(_triangles_and_degree_iter(G,nodes))[2] // 2 return dict( (v,t // 2) for v,d,t in _triangles_and_degree_iter(G,nodes)) def _triangles_and_degree_iter(G,nodes=None): """ Return an iterator of (node, degree, triangles). This double counts triangles so you may want to divide by 2. See degree() and triangles() for definitions and details. """ if G.is_multigraph(): raise NetworkXError("Not defined for multigraphs.") if nodes is None: nodes_nbrs = G.adj.items() else: nodes_nbrs= ( (n,G[n]) for n in G.nbunch_iter(nodes) ) for v,v_nbrs in nodes_nbrs: vs=set(v_nbrs)-set([v]) ntriangles=0 for w in vs: ws=set(G[w])-set([w]) ntriangles+=len(vs.intersection(ws)) yield (v,len(vs),ntriangles) def _weighted_triangles_and_degree_iter(G, nodes=None, weight='weight'): """ Return an iterator of (node, degree, weighted_triangles). Used for weighted clustering. """ if G.is_multigraph(): raise NetworkXError("Not defined for multigraphs.") if weight is None or G.edges()==[]: max_weight=1.0 else: max_weight=float(max(d.get(weight,1.0) for u,v,d in G.edges(data=True))) if nodes is None: nodes_nbrs = G.adj.items() else: nodes_nbrs= ( (n,G[n]) for n in G.nbunch_iter(nodes) ) for i,nbrs in nodes_nbrs: inbrs=set(nbrs)-set([i]) weighted_triangles=0.0 seen=set() for j in inbrs: wij=G[i][j].get(weight,1.0)/max_weight seen.add(j) jnbrs=set(G[j])-seen # this keeps from double counting for k in inbrs&jnbrs: wjk=G[j][k].get(weight,1.0)/max_weight wki=G[i][k].get(weight,1.0)/max_weight weighted_triangles+=(wij*wjk*wki)**(1.0/3.0) yield (i,len(inbrs),weighted_triangles*2) def average_clustering(G, nodes=None, weight=None, count_zeros=True): r"""Compute the average clustering coefficient for the graph G. The clustering coefficient for the graph is the average, .. math:: C = \frac{1}{n}\sum_{v \in G} c_v, where n is the number of nodes in G. Parameters ---------- G : graph nodes : container of nodes, optional (default=all nodes in G) Compute average clustering for nodes in this container. weight : string or None, optional (default=None) The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. count_zeros : bool (default=False) If False include only the nodes with nonzero clustering in the average. Returns ------- avg : float Average clustering Examples -------- >>> G=nx.complete_graph(5) >>> print(nx.average_clustering(G)) 1.0 Notes ----- This is a space saving routine; it might be faster to use the clustering function to get a list and then take the average. Self loops are ignored. References ---------- .. [1] Generalizations of the clustering coefficient to weighted complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). http://jponnela.com/web_documents/a9.pdf .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated nodes and leafs on clustering measures for small-world networks. http://arxiv.org/abs/0802.2512 """ c=clustering(G,nodes,weight=weight).values() if not count_zeros: c = [v for v in c if v > 0] return sum(c)/float(len(c)) def clustering(G, nodes=None, weight=None): r"""Compute the clustering coefficient for nodes. For unweighted graphs the clustering of each node u is the fraction of possible triangles that exist, For each node find the fraction of possible triangles that exist, .. math:: c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)}, where T(u) is the number of triangles through node u and deg(u) is the degree of u. For weighted graphs the clustering is defined as the geometric average of the subgraph edge weights [1]_, .. math:: c_u = \frac{1}{deg(u)(deg(u)-1))} \sum_{uv} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}. The edge weights \hat{w}_{uv} are normalized by the maximum weight in the network \hat{w}_{uv} = w_{uv}/\max(w). The value of c_u is assigned to 0 if deg(u) < 2. Parameters ---------- G : graph nodes : container of nodes, optional (default=all nodes in G) Compute clustering for nodes in this container. weight : string or None, optional (default=None) The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. Returns ------- out : float, or dictionary Clustering coefficient at specified nodes Examples -------- >>> G=nx.complete_graph(5) >>> print(nx.clustering(G,0)) 1.0 >>> print(nx.clustering(G)) {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} Notes ----- Self loops are ignored. References ---------- .. [1] Generalizations of the clustering coefficient to weighted complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). http://jponnela.com/web_documents/a9.pdf """ if G.is_directed(): raise NetworkXError('Clustering algorithms are not defined ', 'for directed graphs.') if weight is not None: td_iter=_weighted_triangles_and_degree_iter(G,nodes,weight) else: td_iter=_triangles_and_degree_iter(G,nodes) clusterc={} for v,d,t in td_iter: if t==0: clusterc[v]=0.0 else: clusterc[v]=t/float(d*(d-1)) if nodes in G: return list(clusterc.values())[0] # return single value return clusterc def transitivity(G): r"""Compute graph transitivity, the fraction of all possible triangles present in G. Possible triangles are identified by the number of "triads" (two edges with a shared vertex). The transitivity is .. math:: T = 3\frac{\#triangles}{\#triads}. Parameters ---------- G : graph Returns ------- out : float Transitivity Examples -------- >>> G = nx.complete_graph(5) >>> print(nx.transitivity(G)) 1.0 """ triangles=0 # 6 times number of triangles contri=0 # 2 times number of connected triples for v,d,t in _triangles_and_degree_iter(G): contri += d*(d-1) triangles += t if triangles==0: # we had no triangles or possible triangles return 0.0 else: return triangles/float(contri) def square_clustering(G, nodes=None): r""" Compute the squares clustering coefficient for nodes. For each node return the fraction of possible squares that exist at the node [1]_ .. math:: C_4(v) = \frac{ \sum_{u=1}^{k_v} \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v} \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]}, where q_v(u,w) are the number of common neighbors of u and w other than v (ie squares), and a_v(u,w) = (k_u - (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw})), where \theta_{uw} = 1 if u and w are connected and 0 otherwise. Parameters ---------- G : graph nodes : container of nodes, optional (default=all nodes in G) Compute clustering for nodes in this container. Returns ------- c4 : dictionary A dictionary keyed by node with the square clustering coefficient value. Examples -------- >>> G=nx.complete_graph(5) >>> print(nx.square_clustering(G,0)) 1.0 >>> print(nx.square_clustering(G)) {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} Notes ----- While C_3(v) (triangle clustering) gives the probability that two neighbors of node v are connected with each other, C_4(v) is the probability that two neighbors of node v share a common neighbor different from v. This algorithm can be applied to both bipartite and unipartite networks. References ---------- .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005 Cycles and clustering in bipartite networks. Physical Review E (72) 056127. """ if nodes is None: node_iter = G else: node_iter = G.nbunch_iter(nodes) clustering = {} for v in node_iter: clustering[v] = 0.0 potential=0 for u,w in combinations(G[v], 2): squares = len((set(G[u]) & set(G[w])) - set([v])) clustering[v] += squares degm = squares + 1.0 if w in G[u]: degm += 1 potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares if potential > 0: clustering[v] /= potential if nodes in G: return list(clustering.values())[0] # return single value return clustering