RocketGit

coria / coria-backend (public) (License: Unspecified) (since 2017-02-23) (hash sha1)

No description available

Clone URLs: https://rocketgit.com/user/coria/coria-backend ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend git://git.rocketgit.com/user/coria/coria-backend

feature/coria-ver1 feature/coria-ver1.5 feature/coria-ver2 master

/metrics.py (6692689a02b4cca1b60e568579723eedb748beec) (14963 bytes) (mode 100644) (type blob)

#metrics.py
import networkx as nx
import numpy as np
import datetime as dt
import graph_tool.all as gt

def clustering_coefficient(self,node):
  print 'Calculating clustering_coefficient for node',node
  #in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
  #NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
  #      in this case, just returning nx.clustering(self.graph, node) might be better
  if not hasattr(self, 'all_clustering_coefficients'):
    self.all_clustering_coefficients = nx.clustering(self.graph)

  #get the actual value from the pre-calculated hash
  return self.all_clustering_coefficients[node]

def degree(self, node):
  print 'Calculating degree for node', node
  return self.graph.degree(node)

def degree_gt(self, node):
  print 'Calculating degree with graph tool for node', node
  # find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  
  # calculate degree for all nodes
  if not hasattr(self.g.vp, 'degree'):
    self.g.vp['degree'] = self.g.degree_property_map("total")

  return self.g.vp.degree[node_index]

def eigenvector_centrality_gt(self, node):
  print 'Calculating eigenvector centrality with graph_tool for node', node

  if not hasattr(self.g.vertex_properties, 'eigenvector'):
    eigenvalue, eigenvector = gt.eigenvector(self.g)
    self.g.vertex_properties.eigenvector = eigenvector
    self.eigenvalue = eigenvalue

  node_index = gt.find_vertex(self.g, self.label_map,node)[0]

# this has been adjusted with eigenvalue for nicer values
  return self.g.vp.eigenvector[self.g.vertex(node_index)]*float(self.eigenvalue)

def eigenvector_centrality(self, node):
  print 'Calculating eigenvector centrality for node', node

  if not hasattr(self, 'all_eigenvector_centralities'):
    self.all_eigenvector_centralities = nx.eigenvector_centrality(self.graph,max_iter=100000)

  return self.all_eigenvector_centralities[node]

def average_neighbor_degree(self,node):
  print 'Calculating average_neighbour_degree for node',node
  # same caching technique as in self.clustering_coefficient
  # might also break for very large graphs
  # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go

  if not hasattr(self, 'all_average_neighbor_degrees'):
    self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
  return self.all_average_neighbor_degrees[node]

def iterated_average_neighbor_degree(self, node):  
  print 'Calculating iterated_average_neighbor degree for node',node
  result = 0 # initialise
  
  first_level_neighbors = self.graph.neighbors(node)
#  print ('First level neigbors are', first_level_neighbors)
  if len(first_level_neighbors) != 0:
    second_level_neighbors = [] 
#  print ('Second level neigbors are', second_level_neighbors)
    # get all two-hop nodes
    for first_level_neighbor in first_level_neighbors:
      current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
      second_level_neighbors.extend(current_second_level_neighbors)

    #remove one-hop nodes and self
    relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
    
    if len(relevant_nodes) != 0:
      degree_sum = 0
      for relevant_node in relevant_nodes:
        degree_sum += self.graph.degree(relevant_node)
      result = float(degree_sum)/float(len(relevant_nodes))
  return result

def iterated_average_neighbour_degree_gt(self, node):  
  print 'Calculating iterated_average_neighbour degree with graph tool for node',node
  
  result = 0 # initialise
  
  vertex = gt.find_vertex(self.g, self.label_map, node)[0]
  first_level_neighbours = vertex.all_neighbors()

  if len(first_level_neighbours) != 0:
    second_level_neighbours = [] 
    # get all two-hop nodes
    for first_level_neighbour in first_level_neighbours:
      current_second_level_neighbours = first_level_neighbour.all_neighbours()
      second_level_neighbours.extend(current_second_level_neighbours)

    #remove one-hop nodes and self
    relevant_vertices = set(second_level_neighbours) - set(first_level_neighbours) - set([vertex])
    
    if len(relevant_vertices) != 0:
      # if degree has not been calculated, yet, calculate degree for all nodes
      if not hasattr(self.g.vp, 'degree'):
        self.g.vp['degree'] = self.g.degree_property_map("total")
  
      degree_sum = 0 # initialise
      for relevant_vertex in relevant_vertices:
        degree_sum += self.g.vp.degree[relevant_vertex]
      result = float(degree_sum)/float(len(relevant_vertices))
  return result

def eccentricity(self, node):
  print 'Calculating eccentricity for node', node
  if not hasattr(self, 'all_eccentricities'):
    l = gt.label_largest_component(self.g) #find the largest component
    print ('Found the largest component')
#    print ("Printing labeled largest component",l.a)  
    u = gt.GraphView(self.g, vfilt=l)   # extract the largest component as a graph
    print 'The number of vertices in the largest component is', u.num_vertices()
    print 'The number of vertices in the original graph is', self.g.num_vertices()
#    if  nx.is_connected(self.graph) == True:
    if (u.num_vertices() == nx.number_of_nodes(self.graph)):
        print ("Graph is connected")
        self.all_eccentricities = nx.eccentricity(self.graph)
        print ("Calculated all eccentricities")
#        print("Eccentricities are",self.all_eccentricities)
        return self.all_eccentricities[node]
    else:
      #  return 0
        print("Graph is disconnected")
        self.all_eccentricities = {}
  if (self.all_eccentricities != {}): 
        print("Returning eccentricity for",node,"-",self.all_eccentricities[node])      
        return self.all_eccentricities[node]
  else:
        print("Returning 0")
        return 0  

def eccentricity_gt(self, node):
  print 'Calculating eccentricity with graph tool for node', node
  
  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  
  if not hasattr(self.g.gp,'pseudo_diameter'):
    # find approx. diameter
    print 'Finding maximum distance for walk'
    self.g.gp['pseudo_diameter']         = self.g.new_gp("int")
    self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
    # endpoints will not be used

  #find all distances from node  
  distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
  #calculate maximum
  maximum   = np.ma.max(np.ma.masked_where(distances > 2147483646, distances),0)
  return maximum

def eccentricity_gt_s(self, node):
  print 'Calculating eccentricity for small graphs with graph tool for node', node
  eccentricity = 0 # initialise

  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  #get all shortest path lengths
  if not hasattr(self, 'all_distances'):
    self.all_distances = gt.shortest_distance(self.g)

  for distance in self.all_distances[node_index]:
    if distance < 2147483647: # disregard all nodes which are not accessible
      eccentricity = max(eccentricity, distance)
  return eccentricity

def betweenness_centrality(self, node):
  print 'Calculating betweenness_centrality for node',node
  if not hasattr(self, 'all_betweenness_centralities'):
    self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
  return self.all_betweenness_centralities[node]


def betweenness_centrality_gt(self, node):
    print 'Calculating betweenness_centrality with graph_tool for node',node
#    print('Self is',self.graph_gt['graph_gt'])
#    print('Self is also',self.graph_gt['graph_gt_labels'])
#    def convert_graph(g):
#converts a networkX graph to graph_tool
#important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
#        adj = nx.adjacency_matrix(g)
#        j = gt.Graph(directed=False)
#        j.add_vertex(len(adj))
#        num_vertices = adj.shape[0]
#        for i in range(num_vertices - 1):
#            for l in range(i + 1, num_vertices):
#                if adj[i,l] != 0:
#                    j.add_edge(i, l)
#        return j
    
    
    if not hasattr(self.g.vertex_properties, 'betweenness'):
        vp,ep = gt.betweenness(self.g)
	# internalize property maps
        self.g.vertex_properties.betweenness = vp         
        self.g.edge_properties.betweenness   = ep      
    node_index = gt.find_vertex(self.g,self.label_map,node)[0]
#    print("Node",node,"has index",node_label)
#    print('Vp is',vp)    
#    print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
            
    return self.g.vp.betweenness[self.g.vertex(node_index)] 

def average_shortest_path_length(self, node):
  print 'Calculating average_shortest_path_length for node',node
  # caching average_shortest_path_length for all nodes at one failed
  # already switched to single calculation

  #get all shortest path lengths
  all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)

  #calculate average
  sum_of_lengths = 0
  for target in all_shortest_path_lengths_for_node:
    sum_of_lengths += all_shortest_path_lengths_for_node[target]
  
  return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)

def average_shortest_path_length_gt(self, node):
  print 'Calculating average_shortest_path_length with graph tool for node',node
  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  
  if not hasattr(self.g.gp,'pseudo_diameter'):
    # find approx. diameter
    print 'Finding maximum distance for walk'
    self.g.gp['pseudo_diameter']         = self.g.new_gp("int")
    self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
    # endpoints will not be used

  #find all distances from node  
  distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
  #calculate average
  average   = np.ma.average(np.ma.masked_where(distances > 2147483646, distances))
  return float(average)
    
def average_shortest_path_length_gt_small_graphs(self, node):
  print 'Calculating average_shortest_path_length for small graphs with graph tool for node',node
  result = 0 # initialise

  #find index of node
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  #get all shortest path lengths
  if not hasattr(self, 'all_distances'):
    self.all_distances = gt.shortest_distance(self.g)
  
  distances = self.all_distances[node_index]
  #calculate average
  sum_of_distances = 0
  accessible_nodes = 0
  for distance in distances:
    if distance < 2147483647: # disregard all nodes in other components
      sum_of_distances += distance
      accessible_nodes += 1
  if accessible_nodes != 0:
    result = float(sum_of_distances)/float(accessible_nodes)
  return result
    
def deterioration(self, node):
  print'Calculating deterioration due to removal of node', node

  #g = self.graph_gt['graph_gt']
  #g.vp.temp = g.new_vertex_property("bool") #create property map for exclusion
  #g.vp.temp.a = 1 #initialise filter map
  node_index = gt.find_vertex(self.g, self.label_map, node)[0]
  self.exclusion_map[node_index] = 0 #take out node
  u = gt.GraphView(self.g, vfilt = self.exclusion_map)
  u = gt.GraphView(self.g, vfilt = gt.label_largest_component(u))
  p = 100.0*(1.0-float(u.num_vertices())/float(self.glc.num_vertices())) 
  self.exclusion_map[node_index] = 1 #reset node
  
  return p

#############
# advanced metrics
#############
def correct_clustering_coefficient(self,node):
  print 'Calculating correct_clustering_coefficient for node',node
  clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
  degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
  max_degree = self.redis.zrange(self.metric_prefix+'degree', -1, -1, withscores=True, score_cast_func=float)[0][1]
  corrected_cc = clustering_coefficient * np.log(degree) / np.log(max_degree)
  return corrected_cc

def correct_clustering_coefficient_old(self,node):
  print 'Calculating correct_clustering_coefficient for node',node
  clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
  degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
  corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
  return corrected_cc

def correct_average_neighbor_degree(self,node):
  print 'Calculating correct_average_neighbor degree for node',node
  avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
  
  if avgnd == 0.0:
    result = avgnd
  else:
    neighbors = self.graph.neighbors(node)
    number_of_neighbors = float(len(neighbors))
    if number_of_neighbors == 0.0:
      result = avgnd
    else:
      neighbor_degrees = []
      for neighbor in neighbors:
        neighbor_degrees.append(self.graph.degree(neighbor))

      #using numpy median and standard deviation implementation
      numpy_neighbor_degrees = np.array(neighbor_degrees)
      standard_deviation = np.std(numpy_neighbor_degrees)
      if standard_deviation == 0.0:
        result = avgnd
      else:
	median = np.median(numpy_neighbor_degrees)
	result = avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
  return result

def correct_iterated_average_neighbor_degree(self, node):
  print 'Calculating correct_iterated_average_neighbor_degree for node '+str(node)
  iand = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
  ciand = iand
  if iand != 0.0:
    first_level_neighbors = self.graph.neighbors(node)
    second_level_neighbors = []

    # get all two-hop nodes
    for first_level_neighbor in first_level_neighbors:
      current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
      second_level_neighbors.extend(current_second_level_neighbors)

    #remove one-hop neighbors and self
    relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])

    if len(relevant_nodes) != 0:
      node_degrees = []
      for relevant_node in relevant_nodes:
        node_degrees.append(self.graph.degree(relevant_node))

      numpy_node_degrees = np.array(node_degrees)
      standard_deviation = np.std(numpy_node_degrees)
      if standard_deviation != 0.0:
        median = np.median(numpy_node_degrees)
        ciand = iand + ( ((median - iand) / standard_deviation) / float(len(relevant_nodes)) ) * iand
  return ciand

Mode	Type	Size	Ref	File
100644	blob	6	0d20b6487c61e7d1bde93acf4a14b7a89083a16d	.gitignore
100644	blob	112	cb89d830628675849b4fc506e25faaee9ada3055	README.md
100644	blob	39806	a3ed366a3ca7fa6017d524c37cb15d492ef7a0ab	RU_ASN.txt
040000	tree	-	fe0210446ce6fae4108a4700bb1ca9d427592d96	RedisHelpers
100644	blob	1256	489636a0073e3dfe2bfd04ee893d609d304a8490	advancedscores.py
100644	blob	1175	51d93965eefe7cc74022747e2b9207a5d619dacc	advancedscores.pyc
100644	blob	1420	985dee55c7d8134c4860db8bcccced703b2eb0af	change_output_file.py
100644	blob	7894	4cad512ec750c425bfc4028598ceb8e49579499b	config.py
100644	blob	2752	04b1176c3586c57b214cf3b20c110c4bd0956d25	config.pyc
040000	tree	-	085c594868d9df498c937b91dc7a43e02fb9a934	data
100644	blob	4571	88c66d8e136505e5a29e3aeb8e6e7e131424f4e3	file_importer.py
100644	blob	2190	9b19358d5f4c39da2da330a4dba4416ae089ff61	file_importer.pyc
100644	blob	633	df288a704ff3390fbe5f3090d488fe45df79c23a	graph_test.py
100644	blob	901	ae318bee51ebd11d4524c90cc13193f7e8bc208f	gt_file_importer.py
100644	blob	1366	12f8577e0a31cee71b70c20849d18ebd36174796	gt_file_importer.pyc
100644	blob	2064	728489f88ef880350a83f4ab8b1e0510239ce067	gt_start.py
100644	blob	858	28f726f3a2fe224a374f24b299fdfdc8bef9f569	indexing.py
100644	blob	1571	68587bbbe3043dbed78d9b3b3018e802e93c6931	indexing.pyc
100644	blob	33232205	7ca601d1ca32abf3244359ce3ad85ea6a1b60010	log
100644	blob	15904	733b22f32ecb738568099b60b22c781df40dd067	metric_calculator.py
100644	blob	11204	b7d78f3bc365a4639c4b462f1724b732d8a11122	metric_calculator.pyc
100644	blob	14963	6692689a02b4cca1b60e568579723eedb748beec	metrics.py
100644	blob	11616	a56470059e373ca2da9ac235a5457235a3a2fef8	metrics.pyc
100644	blob	61770227	1adc4f608c6a5fb41881af629b708af873dfed5f	nohup.out
100644	blob	1665	a959a8cc528f486a80a84e2ab233457870d255a1	normalizations.py
100644	blob	1402	85e8e32fc6b18fabf451d9290c9d33b95d453f5e	normalizations.pyc
100644	blob	10925	a32ff93ffd1cfd5603eb93aa4bd63b921c93ee66	percolation.py
100644	blob	6411	e81344c0f958c1b9ef244749208d4c4f6ca91bd0	percolation.pyc
100644	blob	1445	5c06bec9674e666d2a5633804e1579df1acb6a9e	ru_file_importer.py
100644	blob	1976	2ec49e610f1a124fa66df97f6fd6e6bb230797e4	ru_file_importer.pyc
100644	blob	16641	ee1f7cf9d0fabd7fb76787e8c375cafd9e5c9f35	ru_metric_calculator.py
100644	blob	11412	3d1104701e131696433e467a5bef30a589105865	ru_metric_calculator.pyc
100644	blob	2101	faf282d2fa893c507daa12ae97d29407238b3748	ru_start.py
100755	blob	2122	2fa307ee1f510eefc21af359b48003c3b368c85b	start.py
100644	blob	2150	cf8423b852f3c09938906493ea831d0cfbbec941	statistics.py
100644	blob	2126	f8f55b53a1757681125d5af18c87980b44afe6bb	statistics.pyc
100644	blob	1107	90c62617dd2c7340c2f3d31a3f485d30f9736865	test.py
100644	blob	6574	39e8f0848567e9d6836abb6283290ab35f5fdde3	visualization.py
100644	blob	5643	a4a569c1d73df7dec64e48b266a38bb481e0c0bb	visualization.pyc

Hints:
Before first commit, do not forget to setup your git environment:

git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):

git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):

git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:

git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:

... clone the repository ...
... make some changes and some commits ...
git push origin main