/metrics.py (6692689a02b4cca1b60e568579723eedb748beec) (14963 bytes) (mode 100644) (type blob)

#metrics.py
import networkx as nx
import numpy as np
import datetime as dt
import graph_tool.all as gt

def clustering_coefficient(self,node):
  print 'Calculating clustering_coefficient for node',node
  #in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
  #NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
  #      in this case, just returning nx.clustering(self.graph, node) might be better
  if not hasattr(self, 'all_clustering_coefficients'):
    self.all_clustering_coefficients = nx.clustering(self.graph)

  #get the actual value from the pre-calculated hash
  return self.all_clustering_coefficients[node]

def degree(self, node):
  print 'Calculating degree for node', node
  return self.graph.degree(node)

def degree_gt(self, node):
  print 'Calculating degree with graph tool for node', node
  # find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  
  # calculate degree for all nodes
  if not hasattr(self.g.vp, 'degree'):
    self.g.vp['degree'] = self.g.degree_property_map("total")

  return self.g.vp.degree[node_index]

def eigenvector_centrality_gt(self, node):
  print 'Calculating eigenvector centrality with graph_tool for node', node

  if not hasattr(self.g.vertex_properties, 'eigenvector'):
    eigenvalue, eigenvector = gt.eigenvector(self.g)
    self.g.vertex_properties.eigenvector = eigenvector
    self.eigenvalue = eigenvalue

  node_index = gt.find_vertex(self.g, self.label_map,node)[0]

# this has been adjusted with eigenvalue for nicer values
  return self.g.vp.eigenvector[self.g.vertex(node_index)]*float(self.eigenvalue)

def eigenvector_centrality(self, node):
  print 'Calculating eigenvector centrality for node', node

  if not hasattr(self, 'all_eigenvector_centralities'):
    self.all_eigenvector_centralities = nx.eigenvector_centrality(self.graph,max_iter=100000)

  return self.all_eigenvector_centralities[node]

def average_neighbor_degree(self,node):
  print 'Calculating average_neighbour_degree for node',node
  # same caching technique as in self.clustering_coefficient
  # might also break for very large graphs
  # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go

  if not hasattr(self, 'all_average_neighbor_degrees'):
    self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
  return self.all_average_neighbor_degrees[node]

def iterated_average_neighbor_degree(self, node):  
  print 'Calculating iterated_average_neighbor degree for node',node
  result = 0 # initialise
  
  first_level_neighbors = self.graph.neighbors(node)
#  print ('First level neigbors are', first_level_neighbors)
  if len(first_level_neighbors) != 0:
    second_level_neighbors = [] 
#  print ('Second level neigbors are', second_level_neighbors)
    # get all two-hop nodes
    for first_level_neighbor in first_level_neighbors:
      current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
      second_level_neighbors.extend(current_second_level_neighbors)

    #remove one-hop nodes and self
    relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
    
    if len(relevant_nodes) != 0:
      degree_sum = 0
      for relevant_node in relevant_nodes:
        degree_sum += self.graph.degree(relevant_node)
      result = float(degree_sum)/float(len(relevant_nodes))
  return result

def iterated_average_neighbour_degree_gt(self, node):  
  print 'Calculating iterated_average_neighbour degree with graph tool for node',node
  
  result = 0 # initialise
  
  vertex = gt.find_vertex(self.g, self.label_map, node)[0]
  first_level_neighbours = vertex.all_neighbors()

  if len(first_level_neighbours) != 0:
    second_level_neighbours = [] 
    # get all two-hop nodes
    for first_level_neighbour in first_level_neighbours:
      current_second_level_neighbours = first_level_neighbour.all_neighbours()
      second_level_neighbours.extend(current_second_level_neighbours)

    #remove one-hop nodes and self
    relevant_vertices = set(second_level_neighbours) - set(first_level_neighbours) - set([vertex])
    
    if len(relevant_vertices) != 0:
      # if degree has not been calculated, yet, calculate degree for all nodes
      if not hasattr(self.g.vp, 'degree'):
        self.g.vp['degree'] = self.g.degree_property_map("total")
  
      degree_sum = 0 # initialise
      for relevant_vertex in relevant_vertices:
        degree_sum += self.g.vp.degree[relevant_vertex]
      result = float(degree_sum)/float(len(relevant_vertices))
  return result

def eccentricity(self, node):
  print 'Calculating eccentricity for node', node
  if not hasattr(self, 'all_eccentricities'):
    l = gt.label_largest_component(self.g) #find the largest component
    print ('Found the largest component')
#    print ("Printing labeled largest component",l.a)  
    u = gt.GraphView(self.g, vfilt=l)   # extract the largest component as a graph
    print 'The number of vertices in the largest component is', u.num_vertices()
    print 'The number of vertices in the original graph is', self.g.num_vertices()
#    if  nx.is_connected(self.graph) == True:
    if (u.num_vertices() == nx.number_of_nodes(self.graph)):
        print ("Graph is connected")
        self.all_eccentricities = nx.eccentricity(self.graph)
        print ("Calculated all eccentricities")
#        print("Eccentricities are",self.all_eccentricities)
        return self.all_eccentricities[node]
    else:
      #  return 0
        print("Graph is disconnected")
        self.all_eccentricities = {}
  if (self.all_eccentricities != {}): 
        print("Returning eccentricity for",node,"-",self.all_eccentricities[node])      
        return self.all_eccentricities[node]
  else:
        print("Returning 0")
        return 0  

def eccentricity_gt(self, node):
  print 'Calculating eccentricity with graph tool for node', node
  
  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  
  if not hasattr(self.g.gp,'pseudo_diameter'):
    # find approx. diameter
    print 'Finding maximum distance for walk'
    self.g.gp['pseudo_diameter']         = self.g.new_gp("int")
    self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
    # endpoints will not be used

  #find all distances from node  
  distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
  #calculate maximum
  maximum   = np.ma.max(np.ma.masked_where(distances > 2147483646, distances),0)
  return maximum

def eccentricity_gt_s(self, node):
  print 'Calculating eccentricity for small graphs with graph tool for node', node
  eccentricity = 0 # initialise

  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  #get all shortest path lengths
  if not hasattr(self, 'all_distances'):
    self.all_distances = gt.shortest_distance(self.g)

  for distance in self.all_distances[node_index]:
    if distance < 2147483647: # disregard all nodes which are not accessible
      eccentricity = max(eccentricity, distance)
  return eccentricity

def betweenness_centrality(self, node):
  print 'Calculating betweenness_centrality for node',node
  if not hasattr(self, 'all_betweenness_centralities'):
    self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
  return self.all_betweenness_centralities[node]


def betweenness_centrality_gt(self, node):
    print 'Calculating betweenness_centrality with graph_tool for node',node
#    print('Self is',self.graph_gt['graph_gt'])
#    print('Self is also',self.graph_gt['graph_gt_labels'])
#    def convert_graph(g):
#converts a networkX graph to graph_tool
#important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
#        adj = nx.adjacency_matrix(g)
#        j = gt.Graph(directed=False)
#        j.add_vertex(len(adj))
#        num_vertices = adj.shape[0]
#        for i in range(num_vertices - 1):
#            for l in range(i + 1, num_vertices):
#                if adj[i,l] != 0:
#                    j.add_edge(i, l)
#        return j
    
    
    if not hasattr(self.g.vertex_properties, 'betweenness'):
        vp,ep = gt.betweenness(self.g)
	# internalize property maps
        self.g.vertex_properties.betweenness = vp         
        self.g.edge_properties.betweenness   = ep      
    node_index = gt.find_vertex(self.g,self.label_map,node)[0]
#    print("Node",node,"has index",node_label)
#    print('Vp is',vp)    
#    print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
            
    return self.g.vp.betweenness[self.g.vertex(node_index)] 

def average_shortest_path_length(self, node):
  print 'Calculating average_shortest_path_length for node',node
  # caching average_shortest_path_length for all nodes at one failed
  # already switched to single calculation

  #get all shortest path lengths
  all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)

  #calculate average
  sum_of_lengths = 0
  for target in all_shortest_path_lengths_for_node:
    sum_of_lengths += all_shortest_path_lengths_for_node[target]
  
  return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)

def average_shortest_path_length_gt(self, node):
  print 'Calculating average_shortest_path_length with graph tool for node',node
  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  
  if not hasattr(self.g.gp,'pseudo_diameter'):
    # find approx. diameter
    print 'Finding maximum distance for walk'
    self.g.gp['pseudo_diameter']         = self.g.new_gp("int")
    self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
    # endpoints will not be used

  #find all distances from node  
  distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
  #calculate average
  average   = np.ma.average(np.ma.masked_where(distances > 2147483646, distances))
  return float(average)
    
def average_shortest_path_length_gt_small_graphs(self, node):
  print 'Calculating average_shortest_path_length for small graphs with graph tool for node',node
  result = 0 # initialise

  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  #get all shortest path lengths
  if not hasattr(self, 'all_distances'):
    self.all_distances = gt.shortest_distance(self.g)
  
  distances = self.all_distances[node_index]
  #calculate average
  sum_of_distances = 0
  accessible_nodes = 0
  for distance in distances:
    if distance < 2147483647: # disregard all nodes in other components
      sum_of_distances += distance
      accessible_nodes += 1
  if accessible_nodes != 0:
    result = float(sum_of_distances)/float(accessible_nodes)
  return result
    
def deterioration(self, node):
  print'Calculating deterioration due to removal of node', node

  #g = self.graph_gt['graph_gt']
  #g.vp.temp = g.new_vertex_property("bool") #create property map for exclusion
  #g.vp.temp.a = 1 #initialise filter map
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  self.exclusion_map[node_index] = 0 #take out node
  u = gt.GraphView(self.g, vfilt = self.exclusion_map)
  u = gt.GraphView(self.g, vfilt = gt.label_largest_component(u))
  p = 100.0*(1.0-float(u.num_vertices())/float(self.glc.num_vertices())) 
  self.exclusion_map[node_index] = 1 #reset node
  
  return p

#############
# advanced metrics
#############
def correct_clustering_coefficient(self,node):
  print 'Calculating correct_clustering_coefficient for node',node
  clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
  degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
  max_degree = self.redis.zrange(self.metric_prefix+'degree', -1, -1, withscores=True, score_cast_func=float)[0][1]
  corrected_cc = clustering_coefficient * np.log(degree) / np.log(max_degree)
  return corrected_cc

def correct_clustering_coefficient_old(self,node):
  print 'Calculating correct_clustering_coefficient for node',node
  clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
  degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
  corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
  return corrected_cc

def correct_average_neighbor_degree(self,node):
  print 'Calculating correct_average_neighbor degree for node',node
  avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
  
  if avgnd == 0.0:
    result = avgnd
  else:
    neighbors = self.graph.neighbors(node)
    number_of_neighbors = float(len(neighbors))
    if number_of_neighbors == 0.0:
      result = avgnd
    else:
      neighbor_degrees = []
      for neighbor in neighbors:
        neighbor_degrees.append(self.graph.degree(neighbor))

      #using numpy median and standard deviation implementation
      numpy_neighbor_degrees = np.array(neighbor_degrees)
      standard_deviation = np.std(numpy_neighbor_degrees)
      if standard_deviation == 0.0:
        result = avgnd
      else:
	median = np.median(numpy_neighbor_degrees)
	result = avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
  return result

def correct_iterated_average_neighbor_degree(self, node):
  print 'Calculating correct_iterated_average_neighbor_degree for node '+str(node)
  iand = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
  ciand = iand
  if iand != 0.0:
    first_level_neighbors = self.graph.neighbors(node)
    second_level_neighbors = []

    # get all two-hop nodes
    for first_level_neighbor in first_level_neighbors:
      current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
      second_level_neighbors.extend(current_second_level_neighbors)

    #remove one-hop neighbors and self
    relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])

    if len(relevant_nodes) != 0:
      node_degrees = []
      for relevant_node in relevant_nodes:
        node_degrees.append(self.graph.degree(relevant_node))

      numpy_node_degrees = np.array(node_degrees)
      standard_deviation = np.std(numpy_node_degrees)
      if standard_deviation != 0.0:
        median = np.median(numpy_node_degrees)
        ciand = iand + ( ((median - iand) / standard_deviation) / float(len(relevant_nodes)) ) * iand
  return ciand








Mode Type Size Ref File
100644 blob 6 0d20b6487c61e7d1bde93acf4a14b7a89083a16d .gitignore
100644 blob 112 cb89d830628675849b4fc506e25faaee9ada3055 README.md
100644 blob 39806 a3ed366a3ca7fa6017d524c37cb15d492ef7a0ab RU_ASN.txt
040000 tree - fe0210446ce6fae4108a4700bb1ca9d427592d96 RedisHelpers
100644 blob 1256 489636a0073e3dfe2bfd04ee893d609d304a8490 advancedscores.py
100644 blob 1175 51d93965eefe7cc74022747e2b9207a5d619dacc advancedscores.pyc
100644 blob 1420 985dee55c7d8134c4860db8bcccced703b2eb0af change_output_file.py
100644 blob 7894 4cad512ec750c425bfc4028598ceb8e49579499b config.py
100644 blob 2752 04b1176c3586c57b214cf3b20c110c4bd0956d25 config.pyc
040000 tree - 085c594868d9df498c937b91dc7a43e02fb9a934 data
100644 blob 4571 88c66d8e136505e5a29e3aeb8e6e7e131424f4e3 file_importer.py
100644 blob 2190 9b19358d5f4c39da2da330a4dba4416ae089ff61 file_importer.pyc
100644 blob 633 df288a704ff3390fbe5f3090d488fe45df79c23a graph_test.py
100644 blob 901 ae318bee51ebd11d4524c90cc13193f7e8bc208f gt_file_importer.py
100644 blob 1366 12f8577e0a31cee71b70c20849d18ebd36174796 gt_file_importer.pyc
100644 blob 2064 728489f88ef880350a83f4ab8b1e0510239ce067 gt_start.py
100644 blob 858 28f726f3a2fe224a374f24b299fdfdc8bef9f569 indexing.py
100644 blob 1571 68587bbbe3043dbed78d9b3b3018e802e93c6931 indexing.pyc
100644 blob 33232205 7ca601d1ca32abf3244359ce3ad85ea6a1b60010 log
100644 blob 15904 733b22f32ecb738568099b60b22c781df40dd067 metric_calculator.py
100644 blob 11204 b7d78f3bc365a4639c4b462f1724b732d8a11122 metric_calculator.pyc
100644 blob 14963 6692689a02b4cca1b60e568579723eedb748beec metrics.py
100644 blob 11616 a56470059e373ca2da9ac235a5457235a3a2fef8 metrics.pyc
100644 blob 61770227 1adc4f608c6a5fb41881af629b708af873dfed5f nohup.out
100644 blob 1665 a959a8cc528f486a80a84e2ab233457870d255a1 normalizations.py
100644 blob 1402 85e8e32fc6b18fabf451d9290c9d33b95d453f5e normalizations.pyc
100644 blob 10925 a32ff93ffd1cfd5603eb93aa4bd63b921c93ee66 percolation.py
100644 blob 6411 e81344c0f958c1b9ef244749208d4c4f6ca91bd0 percolation.pyc
100644 blob 1445 5c06bec9674e666d2a5633804e1579df1acb6a9e ru_file_importer.py
100644 blob 1976 2ec49e610f1a124fa66df97f6fd6e6bb230797e4 ru_file_importer.pyc
100644 blob 16641 ee1f7cf9d0fabd7fb76787e8c375cafd9e5c9f35 ru_metric_calculator.py
100644 blob 11412 3d1104701e131696433e467a5bef30a589105865 ru_metric_calculator.pyc
100644 blob 2101 faf282d2fa893c507daa12ae97d29407238b3748 ru_start.py
100755 blob 2122 2fa307ee1f510eefc21af359b48003c3b368c85b start.py
100644 blob 2150 cf8423b852f3c09938906493ea831d0cfbbec941 statistics.py
100644 blob 2126 f8f55b53a1757681125d5af18c87980b44afe6bb statistics.pyc
100644 blob 1107 90c62617dd2c7340c2f3d31a3f485d30f9736865 test.py
100644 blob 6574 39e8f0848567e9d6836abb6283290ab35f5fdde3 visualization.py
100644 blob 5643 a4a569c1d73df7dec64e48b266a38bb481e0c0bb visualization.pyc
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:
git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main