import networkx as nx
import graph_tool.all as gt
import redis as rd
import numpy as np
import indexing
import statistics
import normalizations
import config
import percolation
import visualization
import datetime as dt
class MetricCalculator(object):
def __init__ (self, graph, graph_gt):
#class constructor
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
print ('Starting metric_calculator!')
# for code evaluation
self.start_time = dt.datetime.now()
self.durations = {}
self.durations_in_seconds = {}
self.durations_in_percent = {}
self.graph = graph
self.graph_gt = graph_gt
# alternate name for graph tool graph
self.g = self.graph_gt['graph_gt']
# alternate name for graph tool labels
if not hasattr(self.g.vp, 'label_map'):
self.g.vp.label_map = self.graph_gt['graph_gt_labels']
self.label_map = self.g.vp.label_map
# vertex property map for percolation calculations
if not hasattr(self.g.vp, 'exmap'):
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
self.exclusion_map = self.g.vp.exmap
self.exclusion_map.a = 1 #initialise filter map
#find largest component of graph tool graph for percolation calculations
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
if not hasattr(self.g.vp, 'eigenvector'):
eigenvalue, self.g.vp.eigenvector = gt.eigenvector(self.g)
if not hasattr(self.g.ep, 'betweenness'):
betweenness,self.g.ep.betweenness = gt.betweenness(self.g)
self.redis = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1)
self.nodes = nx.nodes(graph)
# configuration variables are read from the config file and are also saved to class variables for easy access
self.graph_index_key = config.graph_index_key
self.graph_name = ''
while (self.graph_name == ''):
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
self.info_index_key = self.graph_name+':'+config.info_index_key
self.node_index_key = self.graph_name+':'+config.node_index_key
self.metric_index_key = self.graph_name+':'+config.metric_index_key
self.score_index_key = self.graph_name+':'+config.score_index_key
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
self.layout_index_key = self.graph_name+':'+config.layout_index_key
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
self.node_prefix = self.graph_name+':'+config.node_prefix
self.metric_prefix = self.graph_name+':'+config.metric_prefix
self.score_prefix = self.graph_name+':'+config.score_prefix
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
self.normalization_suffix = config.normalization_suffix
self.base_metrics = config.base_metrics
self.advanced_metrics = config.advanced_metrics
self.normalization_methods = config.normalization_methods
self.scores = config.scores
self.advanced_scores = config.advanced_scores
self.visualization_layouts = config.visualization_layouts
# this is commented out for testing purposes
# self.percolation_modes = config.percolation_modes
# self.advanced_percolation_modes = config.advanced_percolation_modes
self.percolation_modes = {'russian_shutoff':config.percolation_modes['russian_shutoff']}
self.advanced_percolation_modes = {}
##############################################################################
###### start describes the entire calculation in a high level overview #######
##############################################################################
def start(self):
start_time_calculation = dt.datetime.now()
#preliminary calculations
#self.flush_database()
self.obtain_percentages()
#self.create_info()
#self.create_standard_layout()
#self.save_graph_data('raw')
#index creation
#self.create_indexes()
#main calculations
#self.calculate_metrics()
#self.calculate_advanced_metrics()
#self.normalize_metrics()
#self.calculate_scores()
#self.calculate_advanced_scores()
#statistics
#self.calculate_statistics()
#dynamic metrics / percolation
self.calculate_percolation()
#visualization
#self.visualize_graph()
#save final graph
self.save_graph_data('russian')
#evaluation
self.duration_total = dt.datetime.now() - start_time_calculation
self.evaluate_durations()
###################
## PRELIMINARIES ##
###################
def flush_database(self):
# ask to clean all data in Redis
flush_flag = 'Flushing'
while (flush_flag != 'y' and flush_flag != 'n'):
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
if flush_flag == 'y':
self.redis.flushdb()
def obtain_percentages(self):
# obtain percentages for calculation of deterioration #
# and calculate number of nodes to remove from graph ##
percentages = '' # initialise
while (percentages == ''):
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
percentages = sorted([float(pct)for pct in percentages.split()])
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
# create a dictionary of percentages and corresponding numbers of nodes
self.percentages = dict(zip(numbers,percentages))
# storing values in redis DB
#self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
def create_info(self):
#store general info about graph
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
def create_standard_layout(self):
# create a standard layout
start_time = dt.datetime.now()
print 'Creating standard layout for graph visualization.'
if not hasattr(self.g.vp,'sfdp'):
self.g.vp.sfdp = gt.sfdp_layout(self.g, C=0.5)
#self.durations['SFDP_layout'] = dt.datetime.now() - start_time
print self.durations['SFDP_layout']
def save_graph_data(self,name):
# save graph
start_time = dt.datetime.now()
print 'Saving raw graph data'
self.g.save(self.graph_name+'_'+name+'.gt.gz')
self.durations['saving_graph'+name] = dt.datetime.now() - start_time
##################
#### INDEXING ####
##################
def create_indexes(self):
start_time = dt.datetime.now()
#call methods defined in indexing.py
#indexing.index_graph(self)
#indexing.index_nodes(self)
#indexing.index_neighbors(self)
#indexing.index_metrics(self)
#indexing.index_scores(self)
#indexing.index_percolation(self)
self.durations['indexing'] = dt.datetime.now() - start_time
###########################
#### CALCULATION LOOPS ####
###########################
def calculate_metrics(self):
start_time_total = dt.datetime.now()
# loop through all defined metrics and call specified calculation method for each node
print ('Starting calculate_metrics')
for metric_name in self.base_metrics:
start_time = dt.datetime.now()
metric_method = self.base_metrics[metric_name]
# loop through all nodes
for node in self.nodes:
# call calculation method of supplied metric for current node
node = int(node)
value = float(metric_method(self,node))
#store result in node values
self.redis.hset(self.node_prefix+str(node), metric_name, value)
#also store result to metric set
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
self.durations['metrics_total'] = dt.datetime.now() - start_time_total
def calculate_advanced_metrics(self):
start_time_total = dt.datetime.now()
# loop through all defined_advanced_metrics and call specified calculation method
print ('Starting calculate_advanced_metrics')
for advanced_metric_name in self.advanced_metrics:
start_time = dt.datetime.now()
metric_method = self.advanced_metrics[advanced_metric_name]
# loop through all nodes
for node in self.nodes:
node = int(node)
value = float(metric_method(self,node))
#store result in node values
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
#also store result to metric set
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
# loop through all defined normalizations and call respective normalization method
# no default normalizations for metrics not listed in the "normalization_methods" hash
def normalize_metrics(self):
start_time = dt.datetime.now()
#fallback normalization: min-max
print ('Starting normalize_metrics')
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
for metric_name in all_metrics:
if self.normalization_methods.has_key(metric_name):
normalization_method = self.normalization_methods[metric_name]
else:
#fallback normalization is min-max
normalization_method = normalizations.min_max
normalization_method(self,metric_name)
self.durations['normalizing'] = dt.datetime.now() - start_time
def calculate_scores(self):
start_time = dt.datetime.now()
print ('Starting calculate_scores')
for score_name in self.scores:
metrics_with_weights = self.scores[score_name]
for node in self.nodes:
score_value = 0.0
# get normalized values
for metric in metrics_with_weights:
weight = self.scores[score_name][metric]
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
score_value += weight * value
#redis_server.hset(key, value, number);
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
self.durations['scores'] = dt.datetime.now() - start_time
def calculate_advanced_scores(self):
start_time = dt.datetime.now()
print ('Starting calculate_advanced_scores')
for advanced_score in self.advanced_scores:
self.advanced_scores[advanced_score](self)
self.durations['adv_scores'] = dt.datetime.now() - start_time
#############
# statistics
#############
def calculate_statistics(self):
start_time = dt.datetime.now()
print ('Starting calculate_statistics')
for metric in self.base_metrics:
#absolute and normalized
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
for advanced_metric in self.advanced_metrics:
#absolute and normalized
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
for score in self.scores:
statistics.calculate_statistics(self, score, self.score_prefix+score)
for advanced_score in self.advanced_scores:
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
self.durations['statistics:stats'] = dt.datetime.now() - start_time
start_time = dt.datetime.now()
statistics.calculate_correlations(self)
self.durations['statistics:corr'] = dt.datetime.now() - start_time
###################
# dynamic metrics #
###################
def calculate_percolation(self):
start_time_total = dt.datetime.now()
print ('Starting percolation calculation')
# shorten the name for percentages and corresponding numbers of nodes to remove
n = self.percentages
# BASIC PERCOLATION MODES
# basic percolation modes take mode_name and n as input and return a #
# dictionary with percentage of nodes removed as key and percentage ##
# of deterioration as value
for mode_name in self.percolation_modes:
start_time = dt.datetime.now()
# initialise exlusion vertex property map
self.exclusion_map.a = 1
# read method from config file
mode_method = self.percolation_modes[mode_name]
# execute method
#results = mode_method(self,mode_name,n)
mode_method(self,mode_name,n)
# index percolation mode
#self.redis.sadd(self.percolation_index_key, mode_name)
# store values
#print 'Storing percolation percentages'
#for percentage in results:
# value = results[percentage]
#store in hash set
#self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
# ADVANCED PERCOLATION MODES
# advanced percolation modes take mode_name and n as input and return a ###
# dictionary with groups of percolation modes (e.g. metrics, countries) ###
# as keys and dictionaries of percentages (removed: deteriorated) as values
for mode_name in self.advanced_percolation_modes:
start_time = dt.datetime.now()
# initialise exlusion vertex property map
self.exclusion_map.a = 1
# read method from config file
mode_method = self.advanced_percolation_modes[mode_name]
# execute method
results = mode_method(self,mode_name,n)
# store values
#print 'Storing percolation percentages'
#for group in results:
# index percolation modes
# self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
#for percentage in results[group]:
# value = results[group][percentage]
#store in hash set
#self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
self.durations['percolation_total'] = dt.datetime.now() - start_time_total
def visualize_graph(self):
for layout_name in self.visualization_layouts:
start_time = dt.datetime.now()
print 'Creating visualisation with '+layout_name+' layout'
layout_method = self.visualization_layouts[layout_name]
self.g.vp[layout_name] = layout_method(self)
gt.graph_draw(self.glc, pos=self.g.vp[layout_name], output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
self.redis.sadd(self.layout_index_key, layout_name)
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
print self.durations['layout:'+layout_name]
def evaluate_durations(self):
#print out times taken
print 'times taken:'
output = open(str(self.graph_name)+"_duration_test_2.txt","w")
output.write("Graph Name:\t"+str(self.graph_name)+"\n")
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
for key in self.durations:
self.durations_in_seconds[key] = self.durations[key].total_seconds()
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
print str(key)+'\t'+str(self.durations_in_percent[key])
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')