/metrics.py (6673fc89a71f9a87f57997fbd96c7ed3c36fb7ff) (8308 bytes) (mode 100644) (type blob)

#metrics.py
import networkx as nx
import numpy as np
import datetime as dt
import graph_tool.all as gt

def clustering_coefficient(self,node):
  print ('Calculating clustering_coefficient for node',node)
  #in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
  #NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
  #      in this case, just returning nx.clustering(self.graph, node) might be better
  if not hasattr(self, 'all_clustering_coefficients'):
    self.all_clustering_coefficients = nx.clustering(self.graph)

  #get the actual value from the pre-calculated hash
  return self.all_clustering_coefficients[node]

def degree(self, node):
  print('Calculating degree for node', node)  
  return self.graph.degree(node)


def average_neighbor_degree(self,node):
  print('Calculating average_neighbour_degree for node',node)
  # same caching technique as in self.clustering_coefficient
  # might also break for very large graphs
  # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go

  if not hasattr(self, 'all_average_neighbor_degrees'):
    self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
  return self.all_average_neighbor_degrees[node]

def iterated_average_neighbor_degree(self, node):  
  print('Calculating iterated_average_neighbor degree for node',node)
  first_level_neighbors = self.graph.neighbors(node)
#  print ('First level neigbors are', first_level_neighbors)
  second_level_neighbors = [] 
#  print ('Second level neigbors are', second_level_neighbors)
  # get all two-hop nodes
  for first_level_neighbor in first_level_neighbors:
    current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
    second_level_neighbors.extend(current_second_level_neighbors)

  #remove one-hop nodes and self
  relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
  
  degree_sum = 0
  for relevant_node in relevant_nodes:
    degree_sum += self.graph.degree(relevant_node)

  if float(len(relevant_nodes)) <> 0:
     return float(degree_sum)/float(len(relevant_nodes))
  else:
     return 0

def eccentricity(self, node):
  print('Calculating eccentricity for node', node)
  if not hasattr(self, 'all_eccentricities'):
    l = gt.label_largest_component(self.graph_gt['graph_gt'],directed = None) #find the largest component
    print ('Found the largest component')
#    print ("Printing labeled largest component",l.a)  
    u = gt.GraphView(self.graph_gt['graph_gt'], vfilt=l)   # extract the largest component as a graph
    print('The number of vertices in the largest component is',u.num_vertices())
    print('The number of vertices in the original graph is', nx.number_of_nodes(self.graph))
#    if  nx.is_connected(self.graph) == True:
    if (u.num_vertices() == nx.number_of_nodes(self.graph)):
        print ("Graph is connected")
        self.all_eccentricities = nx.eccentricity(self.graph)
        print ("Calculated all eccentricities")
#        print("Eccentricities are",self.all_eccentricities)
        return self.all_eccentricities[node]
    else:
      #  return 0
        print("Graph is disconnected")
        self.all_eccentricities = {}
  if (self.all_eccentricities != {}): 
        print("Returning eccentricity for",node,"-",self.all_eccentricities[node])      
        return self.all_eccentricities[node]
  else:
        print("Returning 0")
        return 0  

def betweenness_centrality(self, node):
  print('Calculating betweenness_centrality for node',node)
  if not hasattr(self, 'all_betweenness_centralities'):
    self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
  return self.all_betweenness_centralities[node]


def betweenness_centrality_gt(self, node):
    print('Calculating betweenness_centrality with graph_tool for node',node)
#    print('Self is',self.graph_gt['graph_gt'])
#    print('Self is also',self.graph_gt['graph_gt_labels'])
#    def convert_graph(g):
#converts a networkX graph to graph_tool
#important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
#        adj = nx.adjacency_matrix(g)
#        j = gt.Graph(directed=False)
#        j.add_vertex(len(adj))
#        num_vertices = adj.shape[0]
#        for i in range(num_vertices - 1):
#            for l in range(i + 1, num_vertices):
#                if adj[i,l] != 0:
#                    j.add_edge(i, l)
#        return j
    
    
    if not hasattr(self, 'all_betweenness_centralities_gt'):
        vp,ep = gt.betweenness(self.graph_gt['graph_gt'])
        self.all_betweenness_centralities_gt = vp         
               
    node_label = gt.find_vertex(self.graph_gt['graph_gt'],self.graph_gt['graph_gt_labels'],node)
#    print("Node",node,"has index",node_label)
#    print('Vp is',vp)    
#    print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
            
    return self.all_betweenness_centralities_gt[self.graph_gt['graph_gt'].vertex(node_label[0])] 

def average_shortest_path_length(self, node):
  print('Calculating average_shortes_path_length for node',node)
  # caching average_shortest_path_length for all nodes at one failed
  # already switched to single calculation

  #get all shortest path lengths
  all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)

  #calculate average
  sum_of_lengths = 0
  for target in all_shortest_path_lengths_for_node:
    sum_of_lengths += all_shortest_path_lengths_for_node[target]
  
  return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)


#############
# advanced metrics
#############
def correct_clustering_coefficient(self,node):
  print('Calculating correct_clustering_coefficient for node',node)
  clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
  degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
  corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
  return corrected_cc

def correct_average_neighbor_degree(self,node):
  print('Calculating correct_average_neighbor degree for node',node)
  avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
  
  neighbors = self.graph.neighbors(node)
  number_of_neighbors = float(len(neighbors))
  neighbor_degrees = []
  for neighbor in neighbors:
    neighbor_degrees.append(self.graph.degree(neighbor))

  #using numpy median and standard deviation implementation
  numpy_neighbor_degrees = np.array(neighbor_degrees)
  median = np.median(numpy_neighbor_degrees)
  standard_deviation = np.std(numpy_neighbor_degrees)
  
  if avgnd == 0.0 or number_of_neighbors == 0.0 or standard_deviation == 0.0:
    return avgnd
  else:
    return avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd


def correct_iterated_average_neighbor_degree(self, node):
  print('Calculating correct_iterated_avverage_neighbour_gegree for node',node)
  avgnd = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))

  first_level_neighbors = self.graph.neighbors(node)
  second_level_neighbors = []

  # get all two-hop nodes
  for first_level_neighbor in first_level_neighbors:
    current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
    second_level_neighbors.extend(current_second_level_neighbors)

  #remove one-hop neighbors and self
  relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])

  number_of_nodes = len(relevant_nodes)
  node_degrees = []
  for rel_node in relevant_nodes:
    node_degrees.append(self.graph.degree(rel_node))

  numpy_node_degrees = np.array(node_degrees)
  median = np.median(numpy_node_degrees)
  standard_deviation = np.std(numpy_node_degrees)

  if avgnd == 0.0 or number_of_nodes == 0.0 or standard_deviation == 0.0:
    return avgnd
  else:
    return avgnd + ( ((median - avgnd) / standard_deviation) / number_of_nodes ) * avgnd
  



Mode Type Size Ref File
100644 blob 6 0d20b6487c61e7d1bde93acf4a14b7a89083a16d .gitignore
100644 blob 103 924a1df9f7338af770d3cf3d4b0ce2673f10d1b0 README.md
100644 blob 0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 __init__.py
100644 blob 1256 489636a0073e3dfe2bfd04ee893d609d304a8490 advancedscores.py
100644 blob 4854 b160ebc40fe35a499a9335db937985b806035b46 config.py
040000 tree - 1eae5e19b1eff05e464e361e3f50f3df23f1b754 data
100644 blob 4728 68b7ae967b6cb349e54a32d6d00528321f1825b3 file_importer.py
100644 blob 2132 b09bec79c503afa1126d86335d1372443e207773 graph tool test.py
100644 blob 716 359eb7179fa58d67044228556f7d9c38b5caec85 indexing.py
100644 blob 33232205 7ca601d1ca32abf3244359ce3ad85ea6a1b60010 log
100644 blob 6238 ae4072595be1e113bc28964ec19c6758b1b3ce20 metric_calculator.py
100644 blob 8308 6673fc89a71f9a87f57997fbd96c7ed3c36fb7ff metrics.py
100644 blob 1665 a959a8cc528f486a80a84e2ab233457870d255a1 normalizations.py
100644 blob 1565 32abe33200f0e8dd3bf4973e5956c7ab8545ca4b pearson.py
100644 blob 1696 26df05e3ec9f549013f400a6f5f5df7fdb617c2e start.py
100644 blob 2144 fb03eaa1cd8eb0d6c17b2019fe4c877a32bb7059 statistics.py
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:
git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main