import networkx as nx
import graph_tool.all as gt
import redis as rd
import numpy as np
import indexing
import statistics
import normalizations
import config
import percolation
import visualization
import datetime as dt
class MetricCalculator(object):
def __init__ (self, graph, graph_gt):
#class constructor
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
print ('Starting metric_calculator!')
# for code evaluation
self.start_time = dt.datetime.now()
self.durations = {}
self.durations_in_seconds = {}
self.durations_in_percent = {}
self.graph = graph
self.graph_gt = graph_gt
# alternate name for graph tool graph
self.g = self.graph_gt['graph_gt']
# alternate name for graph tool labels
self.g.vp.label_map = self.graph_gt['graph_gt_labels']
self.label_map = self.g.vp.label_map
# vertex property map for percolation calculations
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
self.exclusion_map = self.g.vp.exmap
self.exclusion_map.a = 1 #initialise filter map
#find largest component of graph tool graph for percolation calculations
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
self.redis = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1)
self.nodes = nx.nodes(graph)
# configuration variables are read from the config file and are also saved to class variables for easy access
self.graph_index_key = config.graph_index_key
self.graph_name = 'coria-graph'
while (self.graph_name == ''):
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
self.info_index_key = self.graph_name+':'+config.info_index_key
self.node_index_key = self.graph_name+':'+config.node_index_key
self.metric_index_key = self.graph_name+':'+config.metric_index_key
self.score_index_key = self.graph_name+':'+config.score_index_key
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
self.layout_index_key = self.graph_name+':'+config.layout_index_key
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
self.node_prefix = self.graph_name+':'+config.node_prefix
self.metric_prefix = self.graph_name+':'+config.metric_prefix
self.score_prefix = self.graph_name+':'+config.score_prefix
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
self.normalization_suffix = config.normalization_suffix
self.base_metrics = config.base_metrics
self.advanced_metrics = config.advanced_metrics
self.normalization_methods = config.normalization_methods
self.scores = config.scores
self.advanced_scores = config.advanced_scores
self.visualization_layouts = config.visualization_layouts
self.percolation_modes = config.percolation_modes
self.advanced_percolation_modes = config.advanced_percolation_modes
##############################################################################
###### start describes the entire calculation in a high level overview #######
##############################################################################
def start(self):
start_time_calculation = dt.datetime.now()
#preliminary calculations
self.flush_database()
self.obtain_percentages()
self.create_info()
self.create_standard_layout()
self.save_graph_data('raw')
#index creation
self.create_indexes()
#main calculations
self.calculate_metrics()
self.calculate_advanced_metrics()
self.normalize_metrics()
self.calculate_scores()
self.calculate_advanced_scores()
#statistics
self.calculate_statistics()
#dynamic metrics / percolation
self.calculate_percolation()
#visualization
self.visualize_graph()
#save final graph
self.save_graph_data('full')
#evaluation
self.duration_total = dt.datetime.now() - start_time_calculation
self.evaluate_durations()
###################
## PRELIMINARIES ##
###################
def flush_database(self):
# Check if FLUSH_REDIS_DB is set to True and flush the DB
if config.FLUSH_REDIS_DB:
self.redis.flushdb()
def obtain_percentages(self):
# obtain percentages for calculation of deterioration #
# and calculate number of nodes to remove from graph ##
percentages = config.PERCOLATION_PERCENTAGES # initialise
while (percentages == ''):
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
percentages = sorted([float(pct)for pct in percentages.split()])
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
# create a dictionary of percentages and corresponding numbers of nodes
self.percentages = dict(zip(numbers,percentages))
# storing values in redis DB
self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
def create_info(self):
#store general info about graph
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
def create_standard_layout(self):
# create a standard layout
start_time = dt.datetime.now()
print 'Creating standard layout for graph visualization.'
if not hasattr(self.g.vp, 'sfdp'):
self.sfdp = gt.sfdp_layout(self.g, C=0.5)
self.g.vp['sfdp'] = self.sfdp
else:
self.sfdp = self.g.vp['sfdp']
self.durations['SFDP_layout'] = dt.datetime.now() - start_time
def save_graph_data(self,name):
# save graph
start_time = dt.datetime.now()
print 'Saving raw graph data'
self.g.save(self.graph_name+'_'+name+'.gt.gz')
self.durations['saving_graph'+name] = dt.datetime.now() - start_time
##################
#### INDEXING ####
##################
def create_indexes(self):
start_time = dt.datetime.now()
#call methods defined in indexing.py
indexing.index_graph(self)
indexing.index_nodes(self)
indexing.index_neighbors(self)
indexing.index_metrics(self)
indexing.index_scores(self)
#indexing.index_percolation(self)
self.durations['indexing'] = dt.datetime.now() - start_time
###########################
#### CALCULATION LOOPS ####
###########################
def calculate_metrics(self):
start_time_total = dt.datetime.now()
# loop through all defined metrics and call specified calculation method for each node
print ('Starting calculate_metrics')
for metric_name in self.base_metrics:
start_time = dt.datetime.now()
metric_method = self.base_metrics[metric_name]
# loop through all nodes
for node in self.nodes:
# call calculation method of supplied metric for current node
node = int(node)
value = float(metric_method(self,node))
#store result in node values
self.redis.hset(self.node_prefix+str(node), metric_name, value)
#also store result to metric set
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
self.durations['metrics_total'] = dt.datetime.now() - start_time_total
def calculate_advanced_metrics(self):
start_time_total = dt.datetime.now()
# loop through all defined_advanced_metrics and call specified calculation method
print ('Starting calculate_advanced_metrics')
for advanced_metric_name in self.advanced_metrics:
start_time = dt.datetime.now()
metric_method = self.advanced_metrics[advanced_metric_name]
# loop through all nodes
for node in self.nodes:
node = int(node)
value = float(metric_method(self,node))
#store result in node values
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
#also store result to metric set
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
# loop through all defined normalizations and call respective normalization method
# no default normalizations for metrics not listed in the "normalization_methods" hash
def normalize_metrics(self):
start_time = dt.datetime.now()
#fallback normalization: min-max
print ('Starting normalize_metrics')
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
for metric_name in all_metrics:
if self.normalization_methods.has_key(metric_name):
normalization_method = self.normalization_methods[metric_name]
else:
#fallback normalization is min-max
normalization_method = normalizations.min_max
normalization_method(self,metric_name)
self.durations['normalizing'] = dt.datetime.now() - start_time
def calculate_scores(self):
start_time = dt.datetime.now()
print ('Starting calculate_scores')
for score_name in self.scores:
metrics_with_weights = self.scores[score_name]
for node in self.nodes:
score_value = 0.0
# get normalized values
for metric in metrics_with_weights:
weight = self.scores[score_name][metric]
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
score_value += weight * value
#redis_server.hset(key, value, number);
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
self.durations['scores'] = dt.datetime.now() - start_time
def calculate_advanced_scores(self):
start_time = dt.datetime.now()
print ('Starting calculate_advanced_scores')
for advanced_score in self.advanced_scores:
self.advanced_scores[advanced_score](self)
self.durations['adv_scores'] = dt.datetime.now() - start_time
#############
# statistics
#############
def calculate_statistics(self):
start_time = dt.datetime.now()
print ('Starting calculate_statistics')
for metric in self.base_metrics:
#absolute and normalized
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
for advanced_metric in self.advanced_metrics:
#absolute and normalized
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
for score in self.scores:
statistics.calculate_statistics(self, score, self.score_prefix+score)
for advanced_score in self.advanced_scores:
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
self.durations['statistics:stats'] = dt.datetime.now() - start_time
start_time = dt.datetime.now()
statistics.calculate_correlations(self)
self.durations['statistics:corr'] = dt.datetime.now() - start_time
###################
# dynamic metrics #
###################
def calculate_percolation(self):
start_time_total = dt.datetime.now()
print ('Starting percolation calculation')
# shorten the name for percentages and corresponding numbers of nodes to remove
n = self.percentages
# BASIC PERCOLATION MODES
# basic percolation modes take mode_name and n as input and return a #
# dictionary with percentage of nodes removed as key and percentage ##
# of deterioration as value
for mode_name in self.percolation_modes:
start_time = dt.datetime.now()
# initialise exlusion vertex property map
self.exclusion_map.a = 1
# read method from config file
mode_method = self.percolation_modes[mode_name]
# execute method
results = mode_method(self,mode_name,n)
# index percolation mode
self.redis.sadd(self.percolation_index_key, mode_name)
# store values
print 'Storing percolation percentages'
for percentage in results:
value = results[percentage]
#store in hash set
self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
# ADVANCED PERCOLATION MODES
# advanced percolation modes take mode_name and n as input and return a ###
# dictionary with groups of percolation modes (e.g. metrics, countries) ###
# as keys and dictionaries of percentages (removed: deteriorated) as values
for mode_name in self.advanced_percolation_modes:
start_time = dt.datetime.now()
# initialise exlusion vertex property map
self.exclusion_map.a = 1
# read method from config file
mode_method = self.advanced_percolation_modes[mode_name]
# execute method
results = mode_method(self,mode_name,n)
# store values
print 'Storing percolation percentages'
for group in results:
# index percolation modes
self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
for percentage in results[group]:
value = results[group][percentage]
#store in hash set
self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
self.durations['percolation_total'] = dt.datetime.now() - start_time_total
def visualize_graph(self):
for layout_name in self.visualization_layouts:
start_time = dt.datetime.now()
print 'Creating visualisation with '+layout_name+' layout'
layout_method = self.visualization_layouts[layout_name]
pos = layout_method(self)
gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
self.redis.sadd(self.layout_index_key, layout_name)
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
def evaluate_durations(self):
#print out times taken
print 'times taken:'
output = open(str(self.graph_name)+"_duration_test.txt","w")
output.write("Graph Name:\t"+str(self.graph_name)+"\n")
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
for key in self.durations:
self.durations_in_seconds[key] = self.durations[key].total_seconds()
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
print str(key)+'\t'+str(self.durations_in_percent[key])
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')