#metrics.py
import networkx as nx
import numpy as np
import datetime as dt
import graph_tool.all as gt
def clustering_coefficient(self,node):
print 'Calculating clustering_coefficient for node',node
#in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
#NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
# in this case, just returning nx.clustering(self.graph, node) might be better
if not hasattr(self, 'all_clustering_coefficients'):
self.all_clustering_coefficients = nx.clustering(self.graph)
#get the actual value from the pre-calculated hash
return self.all_clustering_coefficients[node]
def degree(self, node):
print 'Calculating degree for node', node
return self.graph.degree(node)
def degree_gt(self, node):
print 'Calculating degree with graph tool for node', node
# find index of node
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
# calculate degree for all nodes
if not hasattr(self.g.vp, 'degree'):
self.g.vp['degree'] = self.g.degree_property_map("total")
return self.g.vp.degree[node_index]
def eigenvector_centrality_gt(self, node):
print 'Calculating eigenvector centrality with graph_tool for node', node
if not hasattr(self.g.vertex_properties, 'eigenvector'):
eigenvalue, eigenvector = gt.eigenvector(self.g)
self.g.vertex_properties.eigenvector = eigenvector
self.eigenvalue = eigenvalue
node_index = gt.find_vertex(self.g, self.label_map,node)[0]
# this has been adjusted with eigenvalue for nicer values
return self.g.vp.eigenvector[self.g.vertex(node_index)]*float(self.eigenvalue)
def eigenvector_centrality(self, node):
print 'Calculating eigenvector centrality for node', node
if not hasattr(self, 'all_eigenvector_centralities'):
self.all_eigenvector_centralities = nx.eigenvector_centrality(self.graph,max_iter=100000)
return self.all_eigenvector_centralities[node]
def average_neighbor_degree(self,node):
print 'Calculating average_neighbour_degree for node',node
# same caching technique as in self.clustering_coefficient
# might also break for very large graphs
# nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go
if not hasattr(self, 'all_average_neighbor_degrees'):
self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
return self.all_average_neighbor_degrees[node]
def iterated_average_neighbor_degree(self, node):
print 'Calculating iterated_average_neighbor degree for node',node
result = 0 # initialise
first_level_neighbors = self.graph.neighbors(node)
# print ('First level neigbors are', first_level_neighbors)
if len(first_level_neighbors) != 0:
second_level_neighbors = []
# print ('Second level neigbors are', second_level_neighbors)
# get all two-hop nodes
for first_level_neighbor in first_level_neighbors:
current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
second_level_neighbors.extend(current_second_level_neighbors)
#remove one-hop nodes and self
relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
if len(relevant_nodes) != 0:
degree_sum = 0
for relevant_node in relevant_nodes:
degree_sum += self.graph.degree(relevant_node)
result = float(degree_sum)/float(len(relevant_nodes))
return result
def iterated_average_neighbour_degree_gt(self, node):
print 'Calculating iterated_average_neighbour degree with graph tool for node',node
result = 0 # initialise
vertex = gt.find_vertex(self.g, self.label_map, node)[0]
first_level_neighbours = vertex.all_neighbors()
if len(first_level_neighbours) != 0:
second_level_neighbours = []
# get all two-hop nodes
for first_level_neighbour in first_level_neighbours:
current_second_level_neighbours = first_level_neighbour.all_neighbours()
second_level_neighbours.extend(current_second_level_neighbours)
#remove one-hop nodes and self
relevant_vertices = set(second_level_neighbours) - set(first_level_neighbours) - set([vertex])
if len(relevant_vertices) != 0:
# if degree has not been calculated, yet, calculate degree for all nodes
if not hasattr(self.g.vp, 'degree'):
self.g.vp['degree'] = self.g.degree_property_map("total")
degree_sum = 0 # initialise
for relevant_vertex in relevant_vertices:
degree_sum += self.g.vp.degree[relevant_vertex]
result = float(degree_sum)/float(len(relevant_vertices))
return result
def eccentricity(self, node):
print 'Calculating eccentricity for node', node
if not hasattr(self, 'all_eccentricities'):
l = gt.label_largest_component(self.g) #find the largest component
print ('Found the largest component')
# print ("Printing labeled largest component",l.a)
u = gt.GraphView(self.g, vfilt=l) # extract the largest component as a graph
print 'The number of vertices in the largest component is', u.num_vertices()
print 'The number of vertices in the original graph is', self.g.num_vertices()
# if nx.is_connected(self.graph) == True:
if (u.num_vertices() == nx.number_of_nodes(self.graph)):
print ("Graph is connected")
self.all_eccentricities = nx.eccentricity(self.graph)
print ("Calculated all eccentricities")
# print("Eccentricities are",self.all_eccentricities)
return self.all_eccentricities[node]
else:
# return 0
print("Graph is disconnected")
self.all_eccentricities = {}
if (self.all_eccentricities != {}):
print("Returning eccentricity for",node,"-",self.all_eccentricities[node])
return self.all_eccentricities[node]
else:
print("Returning 0")
return 0
def eccentricity_gt(self, node):
print 'Calculating eccentricity with graph tool for node', node
#find index of node
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
if not hasattr(self.g.gp,'pseudo_diameter'):
# find approx. diameter
print 'Finding maximum distance for walk'
self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
# endpoints will not be used
#find all distances from node
distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
#calculate maximum
maximum = np.ma.max(np.ma.masked_where(distances > 2147483646, distances),0)
return maximum
def eccentricity_gt_s(self, node):
print 'Calculating eccentricity for small graphs with graph tool for node', node
eccentricity = 0 # initialise
#find index of node
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
#get all shortest path lengths
if not hasattr(self, 'all_distances'):
self.all_distances = gt.shortest_distance(self.g)
for distance in self.all_distances[node_index]:
if distance < 2147483647: # disregard all nodes which are not accessible
eccentricity = max(eccentricity, distance)
return eccentricity
def betweenness_centrality(self, node):
print 'Calculating betweenness_centrality for node',node
if not hasattr(self, 'all_betweenness_centralities'):
self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
return self.all_betweenness_centralities[node]
def betweenness_centrality_gt(self, node):
print 'Calculating betweenness_centrality with graph_tool for node',node
# print('Self is',self.graph_gt['graph_gt'])
# print('Self is also',self.graph_gt['graph_gt_labels'])
# def convert_graph(g):
#converts a networkX graph to graph_tool
#important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
# adj = nx.adjacency_matrix(g)
# j = gt.Graph(directed=False)
# j.add_vertex(len(adj))
# num_vertices = adj.shape[0]
# for i in range(num_vertices - 1):
# for l in range(i + 1, num_vertices):
# if adj[i,l] != 0:
# j.add_edge(i, l)
# return j
if not hasattr(self.g.vertex_properties, 'betweenness'):
vp,ep = gt.betweenness(self.g)
# internalize property maps
self.g.vertex_properties.betweenness = vp
self.g.edge_properties.betweenness = ep
node_index = gt.find_vertex(self.g,self.label_map,node)[0]
# print("Node",node,"has index",node_label)
# print('Vp is',vp)
# print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
return self.g.vp.betweenness[self.g.vertex(node_index)]
def average_shortest_path_length(self, node):
print 'Calculating average_shortest_path_length for node',node
# caching average_shortest_path_length for all nodes at one failed
# already switched to single calculation
#get all shortest path lengths
all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)
#calculate average
sum_of_lengths = 0
for target in all_shortest_path_lengths_for_node:
sum_of_lengths += all_shortest_path_lengths_for_node[target]
return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)
def average_shortest_path_length_gt(self, node):
print 'Calculating average_shortest_path_length with graph tool for node',node
#find index of node
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
if not hasattr(self.g.gp,'pseudo_diameter'):
# find approx. diameter
print 'Finding maximum distance for walk'
self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
# endpoints will not be used
#find all distances from node
distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
#calculate average
average = np.ma.average(np.ma.masked_where(distances > 2147483646, distances))
return float(average)
def average_shortest_path_length_gt_small_graphs(self, node):
print 'Calculating average_shortest_path_length for small graphs with graph tool for node',node
result = 0 # initialise
#find index of node
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
#get all shortest path lengths
if not hasattr(self, 'all_distances'):
self.all_distances = gt.shortest_distance(self.g)
distances = self.all_distances[node_index]
#calculate average
sum_of_distances = 0
accessible_nodes = 0
for distance in distances:
if distance < 2147483647: # disregard all nodes in other components
sum_of_distances += distance
accessible_nodes += 1
if accessible_nodes != 0:
result = float(sum_of_distances)/float(accessible_nodes)
return result
def deterioration(self, node):
print'Calculating deterioration due to removal of node', node
#g = self.graph_gt['graph_gt']
#g.vp.temp = g.new_vertex_property("bool") #create property map for exclusion
#g.vp.temp.a = 1 #initialise filter map
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
self.exclusion_map[node_index] = 0 #take out node
u = gt.GraphView(self.g, vfilt = self.exclusion_map)
u = gt.GraphView(self.g, vfilt = gt.label_largest_component(u))
p = 100.0*(1.0-float(u.num_vertices())/float(self.glc.num_vertices()))
self.exclusion_map[node_index] = 1 #reset node
return p
#############
# advanced metrics
#############
def correct_clustering_coefficient(self,node):
print 'Calculating correct_clustering_coefficient for node',node
clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
max_degree = self.redis.zrange(self.metric_prefix+'degree', -1, -1, withscores=True, score_cast_func=float)[0][1]
corrected_cc = clustering_coefficient * np.log(degree) / np.log(max_degree)
return corrected_cc
def correct_clustering_coefficient_old(self,node):
print 'Calculating correct_clustering_coefficient for node',node
clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
return corrected_cc
def correct_average_neighbor_degree(self,node):
print 'Calculating correct_average_neighbor degree for node',node
avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
if avgnd == 0.0:
result = avgnd
else:
neighbors = self.graph.neighbors(node)
number_of_neighbors = float(len(neighbors))
if number_of_neighbors == 0.0:
result = avgnd
else:
neighbor_degrees = []
for neighbor in neighbors:
neighbor_degrees.append(self.graph.degree(neighbor))
#using numpy median and standard deviation implementation
numpy_neighbor_degrees = np.array(neighbor_degrees)
standard_deviation = np.std(numpy_neighbor_degrees)
if standard_deviation == 0.0:
result = avgnd
else:
median = np.median(numpy_neighbor_degrees)
result = avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
return result
def correct_iterated_average_neighbor_degree(self, node):
print 'Calculating correct_iterated_average_neighbor_degree for node '+str(node)
iand = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
ciand = iand
if iand != 0.0:
first_level_neighbors = self.graph.neighbors(node)
second_level_neighbors = []
# get all two-hop nodes
for first_level_neighbor in first_level_neighbors:
current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
second_level_neighbors.extend(current_second_level_neighbors)
#remove one-hop neighbors and self
relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
if len(relevant_nodes) != 0:
node_degrees = []
for relevant_node in relevant_nodes:
node_degrees.append(self.graph.degree(relevant_node))
numpy_node_degrees = np.array(node_degrees)
standard_deviation = np.std(numpy_node_degrees)
if standard_deviation != 0.0:
median = np.median(numpy_node_degrees)
ciand = iand + ( ((median - iand) / standard_deviation) / float(len(relevant_nodes)) ) * iand
return ciand