File config.py added (mode: 100644) (index 0000000..59cc2dd) |
|
1 |
|
#config.py
|
|
2 |
|
import metrics
|
|
3 |
|
import normalizations
|
|
4 |
|
import advancedscores
|
|
5 |
|
import percolation
|
|
6 |
|
import visualization
|
|
7 |
|
|
|
8 |
|
#redis keys for indexes and values
|
|
9 |
|
graph_index_key = 'all_graphs'
|
|
10 |
|
|
|
11 |
|
info_index_key = 'general_info'
|
|
12 |
|
node_index_key = 'all_nodes'
|
|
13 |
|
metric_index_key = 'all_metrics'
|
|
14 |
|
score_index_key = 'all_scores'
|
|
15 |
|
percolation_index_key = 'all_percolation_modes'
|
|
16 |
|
layout_index_key = 'all_layouts'
|
|
17 |
|
|
|
18 |
|
node_neighbors_prefix = 'node_neighbors:'
|
|
19 |
|
node_prefix = 'node_metrics:'
|
|
20 |
|
metric_prefix = 'metric:'
|
|
21 |
|
score_prefix = 'score:'
|
|
22 |
|
statistics_prefix = 'statistics:'
|
|
23 |
|
percolation_prefix = 'percolation:'
|
|
24 |
|
|
|
25 |
|
normalization_suffix = '_normalized'
|
|
26 |
|
|
|
27 |
|
|
|
28 |
|
# definition of all base metrics for which absolute values will be calculcated for each node in the first step
|
|
29 |
|
# key is the name of the metric and value is the implemented method which exposes the required interface
|
|
30 |
|
# interface: each method takes the node as the single parameter, performs the necessary calculation and
|
|
31 |
|
# returns a float containing the value for the specified node
|
|
32 |
|
|
|
33 |
|
base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient,
|
|
34 |
|
'degree' : metrics.degree,
|
|
35 |
|
# 'degree_(gt)' : metrics.degree_gt,
|
|
36 |
|
'average_neighbor_degree' : metrics.average_neighbor_degree,
|
|
37 |
|
'iterated_average_neighbor_degree' : metrics.iterated_average_neighbor_degree,
|
|
38 |
|
# 'iterated_average_neighbor_degree_(gt)': metrics.iterated_average_neighbor_degree,
|
|
39 |
|
# 'betweenness_centrality' : metrics.betweenness_centrality,
|
|
40 |
|
'betweenness_centrality_(gt)' : metrics.betweenness_centrality_gt,
|
|
41 |
|
# 'eccentricity' : metrics.eccentricity,
|
|
42 |
|
'eccentricity_(gt)' : metrics.eccentricity_gt,
|
|
43 |
|
# 'eccentricity_(gt)_s' : metrics.eccentricity_gt_s,
|
|
44 |
|
# 'average_shortest_path_length' : metrics.average_shortest_path_length,
|
|
45 |
|
'average_shortest_path_length_(gt)' : metrics.average_shortest_path_length_gt,
|
|
46 |
|
# 'average_shortest_path_length_(gt)_s' : metrics.average_shortest_path_length_gt_small_graphs,
|
|
47 |
|
'eigenvector_centrality_(gt)' : metrics.eigenvector_centrality_gt,
|
|
48 |
|
# 'eigenvector_centrality' : metrics.eigenvector_centrality,
|
|
49 |
|
# 'deterioration' : metrics.deterioration
|
|
50 |
|
}
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
# some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
|
|
54 |
|
# key is the metric name and value the method for correction
|
|
55 |
|
|
|
56 |
|
advanced_metrics = {'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient,
|
|
57 |
|
'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree,
|
|
58 |
|
'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree}
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
# for every metric, a normalization method has to be specified
|
|
62 |
|
# key is the name of the metric and value is the normalization method which also has to expose the required interface
|
|
63 |
|
# interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
|
|
64 |
|
# the method itself shall access the data which is required for normalization from the redis instance
|
|
65 |
|
# and the corresponding keys/values for the specified metric
|
|
66 |
|
# it shall then loop over all nodes and calculate the normalized value for the node and the metric
|
|
67 |
|
# afterwards it should save the result to redis using "metric_name_normalized" as the key
|
|
68 |
|
# the result is stored inside the node's hash for metrics
|
|
69 |
|
|
|
70 |
|
# also needs to include corrected metrics with their respective names
|
|
71 |
|
#
|
|
72 |
|
normalization_methods = { 'clustering_coefficient' : normalizations.min_max,
|
|
73 |
|
'corrected_clustering_coefficient' : normalizations.min_max,
|
|
74 |
|
'degree' : normalizations.min_max,
|
|
75 |
|
'degree_(gt)' : normalizations.min_max,
|
|
76 |
|
'average_neighbor_degree' : normalizations.min_max,
|
|
77 |
|
'corrected_average_neighbor_degree' : normalizations.min_max,
|
|
78 |
|
'iterated_average_neighbor_degree' : normalizations.min_max,
|
|
79 |
|
'iterated_average_neighbor_degree_(gt)' : normalizations.min_max,
|
|
80 |
|
'corrected_iterated_average_neighbor_degree': normalizations.min_max,
|
|
81 |
|
'betweenness_centrality' : normalizations.min_max,
|
|
82 |
|
'betweenness_centrality_(gt)' : normalizations.min_max,
|
|
83 |
|
'eccentricity' : normalizations.max_min,
|
|
84 |
|
'eccentricity_(gt)' : normalizations.max_min,
|
|
85 |
|
'eccentricity_(gt)_s' : normalizations.max_min,
|
|
86 |
|
'average_shortest_path_length' : normalizations.max_min,
|
|
87 |
|
'average_shortest_path_length_(gt)' : normalizations.max_min,
|
|
88 |
|
'average_shortest_path_length_(gt)_s' : normalizations.max_min,
|
|
89 |
|
'eigenvector_centrality_(gt)' : normalizations.min_max,
|
|
90 |
|
'eigenvector_centrality' : normalizations.min_max,
|
|
91 |
|
'deterioration' : normalizations.min_max
|
|
92 |
|
}
|
|
93 |
|
|
|
94 |
|
|
|
95 |
|
# the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
|
|
96 |
|
# such scores can easily be defined here
|
|
97 |
|
# note: names are not methods but redis keys
|
|
98 |
|
|
|
99 |
|
scores = {'unified_risk_score': { 'degree': 0.25,
|
|
100 |
|
'corrected_average_neighbor_degree': 0.15,
|
|
101 |
|
'corrected_iterated_average_neighbor_degree': 0.1,
|
|
102 |
|
'betweenness_centrality_(gt)': 0.25,
|
|
103 |
|
# 'eccentricity': 0.125,
|
|
104 |
|
'average_shortest_path_length_(gt)': 0.25}
|
|
105 |
|
}
|
|
106 |
|
|
|
107 |
|
|
|
108 |
|
# other scores might require a more sophisticated algorithm to be calculated
|
|
109 |
|
# such scores need to be added here and implemented like the example below
|
|
110 |
|
|
|
111 |
|
advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score}
|
|
112 |
|
|
|
113 |
|
|
|
114 |
|
# these are the different percolation modes with name as key and method as value
|
|
115 |
|
# advanced modes have sub-modes for e.g. each metric
|
|
116 |
|
|
|
117 |
|
percolation_modes = {'failure': percolation.failure,
|
|
118 |
|
'random_walk': percolation.random_walk,
|
|
119 |
|
'russian_shutoff': percolation.russian
|
|
120 |
|
}
|
|
121 |
|
|
|
122 |
|
advanced_percolation_modes = {'target_list': percolation.target_list,
|
|
123 |
|
'hybrid_mode': percolation.hybrid_mode
|
|
124 |
|
}
|
|
125 |
|
|
|
126 |
|
# layouts for graph visualization.
|
|
127 |
|
# note 1: ARF does not seem to work with most graphs (error message: non-invertible matrix)
|
|
128 |
|
# note 2: Fruchtermann-Rheingold layout (FRUCHT) takes up a high percentrage of computation time
|
|
129 |
|
visualization_layouts = {#'SFDP': visualization.sfdp,
|
|
130 |
|
'Radial': visualization.radial,
|
|
131 |
|
#'Random': visualization.random,
|
|
132 |
|
#'ARF': visualization.arf,
|
|
133 |
|
#'Fruchterman_Reingold':visualization.frucht
|
|
134 |
|
}
|
File file_importer.py added (mode: 100644) (index 0000000..88c66d8) |
|
1 |
|
import networkx as nx
|
|
2 |
|
import graph_tool.all as gt
|
|
3 |
|
|
|
4 |
|
class FileImporter(object):
|
|
5 |
|
def __init__(self,filename):
|
|
6 |
|
# initialize data file to parse and new empty graph
|
|
7 |
|
print 'Starting file importer!'
|
|
8 |
|
self.data_file = open(filename)
|
|
9 |
|
self.graph = nx.Graph()
|
|
10 |
|
self.graph_gt = gt.Graph(directed=False)
|
|
11 |
|
self.graph_gt_labels = self.graph_gt.new_vertex_property("double")
|
|
12 |
|
|
|
13 |
|
def read(self):
|
|
14 |
|
for line in self.data_file:
|
|
15 |
|
print "Parsing line",line
|
|
16 |
|
self.parse_line(line)
|
|
17 |
|
return self.graph
|
|
18 |
|
|
|
19 |
|
def read_gt(self):
|
|
20 |
|
return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
|
|
21 |
|
|
|
22 |
|
def parse_line(self, line):
|
|
23 |
|
# split each line on tabstop
|
|
24 |
|
# first field specifies the source node
|
|
25 |
|
# second field specifies the target node
|
|
26 |
|
|
|
27 |
|
fields = line.strip().split("\t")
|
|
28 |
|
from_node = int(fields[0])
|
|
29 |
|
to_node = int(fields[1])
|
|
30 |
|
|
|
31 |
|
# print('\n')
|
|
32 |
|
# print('From node is',from_node)
|
|
33 |
|
# print('To node is',to_node)
|
|
34 |
|
# add edge to the networkx graph
|
|
35 |
|
if (from_node <> to_node):
|
|
36 |
|
self.graph.add_edge(from_node, to_node)
|
|
37 |
|
# print('Network X graph has the following number of nodes',self.graph.number_of_nodes())
|
|
38 |
|
# print('Network X graph has the following number of edges',self.graph.number_of_edges())
|
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
#add edge to the graph_tool graph and create a property map of labels
|
|
43 |
|
#check if nodes are already present and create new ones if not
|
|
44 |
|
#temp = gt.Graph(directed=False)
|
|
45 |
|
#temp_name = temp.new_vertex_property("string")
|
|
46 |
|
temp = self.graph_gt
|
|
47 |
|
temp_name = self.graph_gt_labels
|
|
48 |
|
|
|
49 |
|
check = None
|
|
50 |
|
if (from_node <> to_node): #check if from_node is the same as to_node
|
|
51 |
|
index_from = gt.find_vertex(temp,temp_name,from_node)
|
|
52 |
|
# print('Index from is',index_from)
|
|
53 |
|
index_to = gt.find_vertex(temp,temp_name,to_node)
|
|
54 |
|
# print('Index to is',index_to)
|
|
55 |
|
if (index_from == [] and index_to == []):
|
|
56 |
|
# print('No idences are found')
|
|
57 |
|
c1 = temp.add_vertex()
|
|
58 |
|
temp_name[temp.vertex(c1)] = from_node
|
|
59 |
|
# print('Temp_name is now',temp_name[temp.vertex(c1)])
|
|
60 |
|
c2 = temp.add_vertex()
|
|
61 |
|
temp_name[temp.vertex(c2)] = to_node
|
|
62 |
|
# print('Temp_name is now',temp_name[temp.vertex(c2)])
|
|
63 |
|
if (index_from <> [] and index_to == []) :
|
|
64 |
|
# print('Index from is')
|
|
65 |
|
# print(index_from[0])
|
|
66 |
|
c1 = index_from[0]
|
|
67 |
|
#print('C1 is',c1)
|
|
68 |
|
c2 = temp.add_vertex()
|
|
69 |
|
#print('C2 is'),
|
|
70 |
|
#print(c2)
|
|
71 |
|
temp_name[temp.vertex(c2)] = to_node
|
|
72 |
|
# print('Temp_name is now',temp_name[temp.vertex(c2)])
|
|
73 |
|
if (index_to <> [] and index_from ==[]) :
|
|
74 |
|
# print('Index to is')
|
|
75 |
|
# print(index_to[0])
|
|
76 |
|
c1 = temp.add_vertex()
|
|
77 |
|
c2 = index_to[0]
|
|
78 |
|
temp_name[temp.vertex(c1)] = from_node
|
|
79 |
|
# print('Temp_name is now',temp_name[temp.vertex(c1)])
|
|
80 |
|
if (index_from <> [] and index_to <> []) :
|
|
81 |
|
# print('Both vertices found')
|
|
82 |
|
c1 = index_to[0]
|
|
83 |
|
c2 = index_from[0]
|
|
84 |
|
check = temp.edge(c1,c2) #check if the edge is already present
|
|
85 |
|
# print('Check is',check)
|
|
86 |
|
if (check == None):
|
|
87 |
|
# print("Adding edge between",c1,"and",c2)
|
|
88 |
|
temp.add_edge(c1, c2)
|
|
89 |
|
|
|
90 |
|
#print(temp_name)
|
|
91 |
|
self.graph_gt = temp
|
|
92 |
|
self.graph_gt_labels = temp_name
|
|
93 |
|
|
|
94 |
|
# Check whether GT and NetworkX graphs have the same number of nodes and edges
|
|
95 |
|
# if (self.graph_gt.num_vertices() <> self.graph.number_of_nodes()):
|
|
96 |
|
# print('Unequal number of vertices detected at from node',from_node,'to node',to_node)
|
|
97 |
|
# print('Number of vertices in Gt Graph is',self.graph_gt.num_vertices())
|
|
98 |
|
# print('Number of vertices in NetworkX is',self.graph.number_of_nodes())
|
|
99 |
|
# else:
|
|
100 |
|
# print('Equal number of vertices in both graphs')
|
|
101 |
|
|
|
102 |
|
# if (self.graph_gt.num_edges() <> self.graph.number_of_edges()):
|
|
103 |
|
# print('Unequal number of edges detected at from node',from_node,'to node',to_node)
|
|
104 |
|
# print('Number of vertices in Gt Graph is',self.graph_gt.num_edges())
|
|
105 |
|
# print('Number of vertices in NetworkX is',self.graph.number_of_edges())
|
|
106 |
|
# else:
|
|
107 |
|
# print('Equal number of edges in both graphs')
|
|
108 |
|
|
|
109 |
|
# if (self.graph.number_of_nodes() <> self.graph_gt.
|
|
110 |
|
# print('Graph tool graph is',self.graph_gt)
|
|
111 |
|
# print('Graph tool labels map is',self.graph_gt_labels)
|
|
112 |
|
|
|
113 |
|
|
|
114 |
|
|
|
115 |
|
|
File metric_calculator.py added (mode: 100644) (index 0000000..26c1f58) |
|
1 |
|
import networkx as nx
|
|
2 |
|
import graph_tool.all as gt
|
|
3 |
|
import redis as rd
|
|
4 |
|
import numpy as np
|
|
5 |
|
import indexing
|
|
6 |
|
import statistics
|
|
7 |
|
import normalizations
|
|
8 |
|
import config
|
|
9 |
|
import percolation
|
|
10 |
|
import visualization
|
|
11 |
|
import datetime as dt
|
|
12 |
|
|
|
13 |
|
|
|
14 |
|
class MetricCalculator(object):
|
|
15 |
|
def __init__ (self, graph, graph_gt):
|
|
16 |
|
#class constructor
|
|
17 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
|
|
18 |
|
|
|
19 |
|
print ('Starting metric_calculator!')
|
|
20 |
|
|
|
21 |
|
# for code evaluation
|
|
22 |
|
self.start_time = dt.datetime.now()
|
|
23 |
|
self.durations = {}
|
|
24 |
|
self.durations_in_seconds = {}
|
|
25 |
|
self.durations_in_percent = {}
|
|
26 |
|
|
|
27 |
|
self.graph = graph
|
|
28 |
|
self.graph_gt = graph_gt
|
|
29 |
|
|
|
30 |
|
# alternate name for graph tool graph
|
|
31 |
|
self.g = self.graph_gt['graph_gt']
|
|
32 |
|
# alternate name for graph tool labels
|
|
33 |
|
self.g.vp.label_map = self.graph_gt['graph_gt_labels']
|
|
34 |
|
self.label_map = self.g.vp.label_map
|
|
35 |
|
# vertex property map for percolation calculations
|
|
36 |
|
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
|
|
37 |
|
self.exclusion_map = self.g.vp.exmap
|
|
38 |
|
self.exclusion_map.a = 1 #initialise filter map
|
|
39 |
|
#find largest component of graph tool graph for percolation calculations
|
|
40 |
|
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
|
|
41 |
|
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
|
|
45 |
|
self.nodes = nx.nodes(graph)
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access
|
|
49 |
|
self.graph_index_key = config.graph_index_key
|
|
50 |
|
|
|
51 |
|
self.graph_name = ''
|
|
52 |
|
while (self.graph_name == ''):
|
|
53 |
|
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
|
|
54 |
|
|
|
55 |
|
self.info_index_key = self.graph_name+':'+config.info_index_key
|
|
56 |
|
self.node_index_key = self.graph_name+':'+config.node_index_key
|
|
57 |
|
self.metric_index_key = self.graph_name+':'+config.metric_index_key
|
|
58 |
|
self.score_index_key = self.graph_name+':'+config.score_index_key
|
|
59 |
|
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
|
|
60 |
|
self.layout_index_key = self.graph_name+':'+config.layout_index_key
|
|
61 |
|
|
|
62 |
|
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
|
|
63 |
|
self.node_prefix = self.graph_name+':'+config.node_prefix
|
|
64 |
|
self.metric_prefix = self.graph_name+':'+config.metric_prefix
|
|
65 |
|
self.score_prefix = self.graph_name+':'+config.score_prefix
|
|
66 |
|
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
|
|
67 |
|
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
|
|
68 |
|
|
|
69 |
|
self.normalization_suffix = config.normalization_suffix
|
|
70 |
|
|
|
71 |
|
self.base_metrics = config.base_metrics
|
|
72 |
|
self.advanced_metrics = config.advanced_metrics
|
|
73 |
|
|
|
74 |
|
self.normalization_methods = config.normalization_methods
|
|
75 |
|
|
|
76 |
|
self.scores = config.scores
|
|
77 |
|
self.advanced_scores = config.advanced_scores
|
|
78 |
|
|
|
79 |
|
self.visualization_layouts = config.visualization_layouts
|
|
80 |
|
|
|
81 |
|
self.percolation_modes = config.percolation_modes
|
|
82 |
|
self.advanced_percolation_modes = config.advanced_percolation_modes
|
|
83 |
|
|
|
84 |
|
##############################################################################
|
|
85 |
|
###### start describes the entire calculation in a high level overview #######
|
|
86 |
|
##############################################################################
|
|
87 |
|
|
|
88 |
|
def start(self):
|
|
89 |
|
start_time_calculation = dt.datetime.now()
|
|
90 |
|
|
|
91 |
|
#preliminary calculations
|
|
92 |
|
self.flush_database()
|
|
93 |
|
self.obtain_percentages()
|
|
94 |
|
self.create_info()
|
|
95 |
|
self.create_standard_layout()
|
|
96 |
|
self.save_graph_data('raw')
|
|
97 |
|
|
|
98 |
|
#index creation
|
|
99 |
|
self.create_indexes()
|
|
100 |
|
|
|
101 |
|
#main calculations
|
|
102 |
|
self.calculate_metrics()
|
|
103 |
|
self.calculate_advanced_metrics()
|
|
104 |
|
self.normalize_metrics()
|
|
105 |
|
self.calculate_scores()
|
|
106 |
|
self.calculate_advanced_scores()
|
|
107 |
|
|
|
108 |
|
#statistics
|
|
109 |
|
self.calculate_statistics()
|
|
110 |
|
|
|
111 |
|
#dynamic metrics / percolation
|
|
112 |
|
self.calculate_percolation()
|
|
113 |
|
|
|
114 |
|
#visualization
|
|
115 |
|
self.visualize_graph()
|
|
116 |
|
|
|
117 |
|
#save final graph
|
|
118 |
|
self.save_graph_data('full')
|
|
119 |
|
|
|
120 |
|
#evaluation
|
|
121 |
|
self.duration_total = dt.datetime.now() - start_time_calculation
|
|
122 |
|
self.evaluate_durations()
|
|
123 |
|
|
|
124 |
|
|
|
125 |
|
###################
|
|
126 |
|
## PRELIMINARIES ##
|
|
127 |
|
###################
|
|
128 |
|
def flush_database(self):
|
|
129 |
|
# ask to clean all data in Redis
|
|
130 |
|
flush_flag = 'Flushing'
|
|
131 |
|
while (flush_flag != 'y' and flush_flag != 'n'):
|
|
132 |
|
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
|
|
133 |
|
if flush_flag == 'y':
|
|
134 |
|
self.redis.flushdb()
|
|
135 |
|
|
|
136 |
|
def obtain_percentages(self):
|
|
137 |
|
# obtain percentages for calculation of deterioration #
|
|
138 |
|
# and calculate number of nodes to remove from graph ##
|
|
139 |
|
percentages = '' # initialise
|
|
140 |
|
while (percentages == ''):
|
|
141 |
|
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
|
|
142 |
|
|
|
143 |
|
percentages = sorted([float(pct)for pct in percentages.split()])
|
|
144 |
|
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
|
|
145 |
|
# create a dictionary of percentages and corresponding numbers of nodes
|
|
146 |
|
self.percentages = dict(zip(numbers,percentages))
|
|
147 |
|
# storing values in redis DB
|
|
148 |
|
self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
|
|
149 |
|
|
|
150 |
|
def create_info(self):
|
|
151 |
|
#store general info about graph
|
|
152 |
|
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
|
|
153 |
|
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
|
|
154 |
|
|
|
155 |
|
def create_standard_layout(self):
|
|
156 |
|
# create a standard layout
|
|
157 |
|
start_time = dt.datetime.now()
|
|
158 |
|
print 'Creating standard layout for graph visualization.'
|
|
159 |
|
if not hasattr(self.g.vp, 'sfdp'):
|
|
160 |
|
self.sfdp = gt.sfdp_layout(self.g, C=0.5)
|
|
161 |
|
self.g.vp['sfdp'] = self.sfdp
|
|
162 |
|
else:
|
|
163 |
|
self.sfdp = self.g.vp['sfdp']
|
|
164 |
|
self.durations['SFDP_layout'] = dt.datetime.now() - start_time
|
|
165 |
|
|
|
166 |
|
def save_graph_data(self,name):
|
|
167 |
|
# save graph
|
|
168 |
|
start_time = dt.datetime.now()
|
|
169 |
|
print 'Saving raw graph data'
|
|
170 |
|
self.g.save(self.graph_name+'_'+name+'.gt.gz')
|
|
171 |
|
self.durations['saving_graph'+name] = dt.datetime.now() - start_time
|
|
172 |
|
|
|
173 |
|
##################
|
|
174 |
|
#### INDEXING ####
|
|
175 |
|
##################
|
|
176 |
|
def create_indexes(self):
|
|
177 |
|
start_time = dt.datetime.now()
|
|
178 |
|
#call methods defined in indexing.py
|
|
179 |
|
indexing.index_graph(self)
|
|
180 |
|
indexing.index_nodes(self)
|
|
181 |
|
indexing.index_neighbors(self)
|
|
182 |
|
indexing.index_metrics(self)
|
|
183 |
|
indexing.index_scores(self)
|
|
184 |
|
#indexing.index_percolation(self)
|
|
185 |
|
self.durations['indexing'] = dt.datetime.now() - start_time
|
|
186 |
|
|
|
187 |
|
###########################
|
|
188 |
|
#### CALCULATION LOOPS ####
|
|
189 |
|
###########################
|
|
190 |
|
|
|
191 |
|
def calculate_metrics(self):
|
|
192 |
|
start_time_total = dt.datetime.now()
|
|
193 |
|
# loop through all defined metrics and call specified calculation method for each node
|
|
194 |
|
print ('Starting calculate_metrics')
|
|
195 |
|
for metric_name in self.base_metrics:
|
|
196 |
|
start_time = dt.datetime.now()
|
|
197 |
|
metric_method = self.base_metrics[metric_name]
|
|
198 |
|
|
|
199 |
|
# loop through all nodes
|
|
200 |
|
for node in self.nodes:
|
|
201 |
|
# call calculation method of supplied metric for current node
|
|
202 |
|
node = int(node)
|
|
203 |
|
value = float(metric_method(self,node))
|
|
204 |
|
|
|
205 |
|
#store result in node values
|
|
206 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value)
|
|
207 |
|
|
|
208 |
|
#also store result to metric set
|
|
209 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
|
210 |
|
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
|
|
211 |
|
self.durations['metrics_total'] = dt.datetime.now() - start_time_total
|
|
212 |
|
|
|
213 |
|
|
|
214 |
|
def calculate_advanced_metrics(self):
|
|
215 |
|
start_time_total = dt.datetime.now()
|
|
216 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
|
217 |
|
print ('Starting calculate_advanced_metrics')
|
|
218 |
|
for advanced_metric_name in self.advanced_metrics:
|
|
219 |
|
start_time = dt.datetime.now()
|
|
220 |
|
metric_method = self.advanced_metrics[advanced_metric_name]
|
|
221 |
|
|
|
222 |
|
# loop through all nodes
|
|
223 |
|
for node in self.nodes:
|
|
224 |
|
node = int(node)
|
|
225 |
|
value = float(metric_method(self,node))
|
|
226 |
|
|
|
227 |
|
#store result in node values
|
|
228 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
|
|
229 |
|
|
|
230 |
|
#also store result to metric set
|
|
231 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
|
|
232 |
|
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
|
|
233 |
|
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
|
|
234 |
|
|
|
235 |
|
|
|
236 |
|
# loop through all defined normalizations and call respective normalization method
|
|
237 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash
|
|
238 |
|
def normalize_metrics(self):
|
|
239 |
|
start_time = dt.datetime.now()
|
|
240 |
|
#fallback normalization: min-max
|
|
241 |
|
print ('Starting normalize_metrics')
|
|
242 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
|
|
243 |
|
|
|
244 |
|
for metric_name in all_metrics:
|
|
245 |
|
if self.normalization_methods.has_key(metric_name):
|
|
246 |
|
normalization_method = self.normalization_methods[metric_name]
|
|
247 |
|
else:
|
|
248 |
|
#fallback normalization is min-max
|
|
249 |
|
normalization_method = normalizations.min_max
|
|
250 |
|
normalization_method(self,metric_name)
|
|
251 |
|
|
|
252 |
|
self.durations['normalizing'] = dt.datetime.now() - start_time
|
|
253 |
|
|
|
254 |
|
|
|
255 |
|
def calculate_scores(self):
|
|
256 |
|
start_time = dt.datetime.now()
|
|
257 |
|
print ('Starting calculate_scores')
|
|
258 |
|
for score_name in self.scores:
|
|
259 |
|
metrics_with_weights = self.scores[score_name]
|
|
260 |
|
|
|
261 |
|
for node in self.nodes:
|
|
262 |
|
score_value = 0.0
|
|
263 |
|
|
|
264 |
|
# get normalized values
|
|
265 |
|
for metric in metrics_with_weights:
|
|
266 |
|
weight = self.scores[score_name][metric]
|
|
267 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
|
|
268 |
|
score_value += weight * value
|
|
269 |
|
|
|
270 |
|
#redis_server.hset(key, value, number);
|
|
271 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
|
|
272 |
|
|
|
273 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
|
|
274 |
|
|
|
275 |
|
self.durations['scores'] = dt.datetime.now() - start_time
|
|
276 |
|
|
|
277 |
|
def calculate_advanced_scores(self):
|
|
278 |
|
start_time = dt.datetime.now()
|
|
279 |
|
print ('Starting calculate_advanced_scores')
|
|
280 |
|
for advanced_score in self.advanced_scores:
|
|
281 |
|
self.advanced_scores[advanced_score](self)
|
|
282 |
|
|
|
283 |
|
self.durations['adv_scores'] = dt.datetime.now() - start_time
|
|
284 |
|
|
|
285 |
|
|
|
286 |
|
#############
|
|
287 |
|
# statistics
|
|
288 |
|
#############
|
|
289 |
|
|
|
290 |
|
def calculate_statistics(self):
|
|
291 |
|
start_time = dt.datetime.now()
|
|
292 |
|
print ('Starting calculate_statistics')
|
|
293 |
|
for metric in self.base_metrics:
|
|
294 |
|
#absolute and normalized
|
|
295 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
|
|
296 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
|
|
297 |
|
|
|
298 |
|
for advanced_metric in self.advanced_metrics:
|
|
299 |
|
#absolute and normalized
|
|
300 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
|
|
301 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
|
|
302 |
|
|
|
303 |
|
for score in self.scores:
|
|
304 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score)
|
|
305 |
|
|
|
306 |
|
for advanced_score in self.advanced_scores:
|
|
307 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
|
|
308 |
|
self.durations['statistics:stats'] = dt.datetime.now() - start_time
|
|
309 |
|
|
|
310 |
|
start_time = dt.datetime.now()
|
|
311 |
|
statistics.calculate_correlations(self)
|
|
312 |
|
self.durations['statistics:corr'] = dt.datetime.now() - start_time
|
|
313 |
|
|
|
314 |
|
###################
|
|
315 |
|
# dynamic metrics #
|
|
316 |
|
###################
|
|
317 |
|
|
|
318 |
|
def calculate_percolation(self):
|
|
319 |
|
start_time_total = dt.datetime.now()
|
|
320 |
|
print ('Starting percolation calculation')
|
|
321 |
|
|
|
322 |
|
# shorten the name for percentages and corresponding numbers of nodes to remove
|
|
323 |
|
n = self.percentages
|
|
324 |
|
|
|
325 |
|
# BASIC PERCOLATION MODES
|
|
326 |
|
# basic percolation modes take mode_name and n as input and return a #
|
|
327 |
|
# dictionary with percentage of nodes removed as key and percentage ##
|
|
328 |
|
# of deterioration as value
|
|
329 |
|
for mode_name in self.percolation_modes:
|
|
330 |
|
start_time = dt.datetime.now()
|
|
331 |
|
# initialise exlusion vertex property map
|
|
332 |
|
self.exclusion_map.a = 1
|
|
333 |
|
# read method from config file
|
|
334 |
|
mode_method = self.percolation_modes[mode_name]
|
|
335 |
|
# execute method
|
|
336 |
|
results = mode_method(self,mode_name,n)
|
|
337 |
|
# index percolation mode
|
|
338 |
|
self.redis.sadd(self.percolation_index_key, mode_name)
|
|
339 |
|
# store values
|
|
340 |
|
print 'Storing percolation percentages'
|
|
341 |
|
for percentage in results:
|
|
342 |
|
value = results[percentage]
|
|
343 |
|
#store in hash set
|
|
344 |
|
self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
|
|
345 |
|
|
|
346 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
347 |
|
|
|
348 |
|
# ADVANCED PERCOLATION MODES
|
|
349 |
|
# advanced percolation modes take mode_name and n as input and return a ###
|
|
350 |
|
# dictionary with groups of percolation modes (e.g. metrics, countries) ###
|
|
351 |
|
# as keys and dictionaries of percentages (removed: deteriorated) as values
|
|
352 |
|
for mode_name in self.advanced_percolation_modes:
|
|
353 |
|
start_time = dt.datetime.now()
|
|
354 |
|
# initialise exlusion vertex property map
|
|
355 |
|
self.exclusion_map.a = 1
|
|
356 |
|
# read method from config file
|
|
357 |
|
mode_method = self.advanced_percolation_modes[mode_name]
|
|
358 |
|
# execute method
|
|
359 |
|
results = mode_method(self,mode_name,n)
|
|
360 |
|
|
|
361 |
|
# store values
|
|
362 |
|
print 'Storing percolation percentages'
|
|
363 |
|
for group in results:
|
|
364 |
|
# index percolation modes
|
|
365 |
|
self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
|
|
366 |
|
for percentage in results[group]:
|
|
367 |
|
value = results[group][percentage]
|
|
368 |
|
#store in hash set
|
|
369 |
|
self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
|
|
370 |
|
|
|
371 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
372 |
|
|
|
373 |
|
self.durations['percolation_total'] = dt.datetime.now() - start_time_total
|
|
374 |
|
|
|
375 |
|
|
|
376 |
|
def visualize_graph(self):
|
|
377 |
|
|
|
378 |
|
for layout_name in self.visualization_layouts:
|
|
379 |
|
start_time = dt.datetime.now()
|
|
380 |
|
print 'Creating visualisation with '+layout_name+' layout'
|
|
381 |
|
|
|
382 |
|
layout_method = self.visualization_layouts[layout_name]
|
|
383 |
|
pos = layout_method(self)
|
|
384 |
|
gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
|
|
385 |
|
|
|
386 |
|
self.redis.sadd(self.layout_index_key, layout_name)
|
|
387 |
|
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
|
|
388 |
|
|
|
389 |
|
def evaluate_durations(self):
|
|
390 |
|
#print out times taken
|
|
391 |
|
print 'times taken:'
|
|
392 |
|
output = open(str(self.graph_name)+"_duration_test.txt","w")
|
|
393 |
|
output.write("Graph Name:\t"+str(self.graph_name)+"\n")
|
|
394 |
|
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
|
|
395 |
|
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
|
|
396 |
|
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
|
|
397 |
|
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
|
|
398 |
|
for key in self.durations:
|
|
399 |
|
self.durations_in_seconds[key] = self.durations[key].total_seconds()
|
|
400 |
|
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
|
|
401 |
|
|
|
402 |
|
print str(key)+'\t'+str(self.durations_in_percent[key])
|
|
403 |
|
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')
|
File metrics.py added (mode: 100644) (index 0000000..6692689) |
|
1 |
|
#metrics.py
|
|
2 |
|
import networkx as nx
|
|
3 |
|
import numpy as np
|
|
4 |
|
import datetime as dt
|
|
5 |
|
import graph_tool.all as gt
|
|
6 |
|
|
|
7 |
|
def clustering_coefficient(self,node):
|
|
8 |
|
print 'Calculating clustering_coefficient for node',node
|
|
9 |
|
#in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
|
|
10 |
|
#NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
|
|
11 |
|
# in this case, just returning nx.clustering(self.graph, node) might be better
|
|
12 |
|
if not hasattr(self, 'all_clustering_coefficients'):
|
|
13 |
|
self.all_clustering_coefficients = nx.clustering(self.graph)
|
|
14 |
|
|
|
15 |
|
#get the actual value from the pre-calculated hash
|
|
16 |
|
return self.all_clustering_coefficients[node]
|
|
17 |
|
|
|
18 |
|
def degree(self, node):
|
|
19 |
|
print 'Calculating degree for node', node
|
|
20 |
|
return self.graph.degree(node)
|
|
21 |
|
|
|
22 |
|
def degree_gt(self, node):
|
|
23 |
|
print 'Calculating degree with graph tool for node', node
|
|
24 |
|
# find index of node
|
|
25 |
|
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
26 |
|
|
|
27 |
|
# calculate degree for all nodes
|
|
28 |
|
if not hasattr(self.g.vp, 'degree'):
|
|
29 |
|
self.g.vp['degree'] = self.g.degree_property_map("total")
|
|
30 |
|
|
|
31 |
|
return self.g.vp.degree[node_index]
|
|
32 |
|
|
|
33 |
|
def eigenvector_centrality_gt(self, node):
|
|
34 |
|
print 'Calculating eigenvector centrality with graph_tool for node', node
|
|
35 |
|
|
|
36 |
|
if not hasattr(self.g.vertex_properties, 'eigenvector'):
|
|
37 |
|
eigenvalue, eigenvector = gt.eigenvector(self.g)
|
|
38 |
|
self.g.vertex_properties.eigenvector = eigenvector
|
|
39 |
|
self.eigenvalue = eigenvalue
|
|
40 |
|
|
|
41 |
|
node_index = gt.find_vertex(self.g, self.label_map,node)[0]
|
|
42 |
|
|
|
43 |
|
# this has been adjusted with eigenvalue for nicer values
|
|
44 |
|
return self.g.vp.eigenvector[self.g.vertex(node_index)]*float(self.eigenvalue)
|
|
45 |
|
|
|
46 |
|
def eigenvector_centrality(self, node):
|
|
47 |
|
print 'Calculating eigenvector centrality for node', node
|
|
48 |
|
|
|
49 |
|
if not hasattr(self, 'all_eigenvector_centralities'):
|
|
50 |
|
self.all_eigenvector_centralities = nx.eigenvector_centrality(self.graph,max_iter=100000)
|
|
51 |
|
|
|
52 |
|
return self.all_eigenvector_centralities[node]
|
|
53 |
|
|
|
54 |
|
def average_neighbor_degree(self,node):
|
|
55 |
|
print 'Calculating average_neighbour_degree for node',node
|
|
56 |
|
# same caching technique as in self.clustering_coefficient
|
|
57 |
|
# might also break for very large graphs
|
|
58 |
|
# nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go
|
|
59 |
|
|
|
60 |
|
if not hasattr(self, 'all_average_neighbor_degrees'):
|
|
61 |
|
self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
|
|
62 |
|
return self.all_average_neighbor_degrees[node]
|
|
63 |
|
|
|
64 |
|
def iterated_average_neighbor_degree(self, node):
|
|
65 |
|
print 'Calculating iterated_average_neighbor degree for node',node
|
|
66 |
|
result = 0 # initialise
|
|
67 |
|
|
|
68 |
|
first_level_neighbors = self.graph.neighbors(node)
|
|
69 |
|
# print ('First level neigbors are', first_level_neighbors)
|
|
70 |
|
if len(first_level_neighbors) != 0:
|
|
71 |
|
second_level_neighbors = []
|
|
72 |
|
# print ('Second level neigbors are', second_level_neighbors)
|
|
73 |
|
# get all two-hop nodes
|
|
74 |
|
for first_level_neighbor in first_level_neighbors:
|
|
75 |
|
current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
|
|
76 |
|
second_level_neighbors.extend(current_second_level_neighbors)
|
|
77 |
|
|
|
78 |
|
#remove one-hop nodes and self
|
|
79 |
|
relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
|
|
80 |
|
|
|
81 |
|
if len(relevant_nodes) != 0:
|
|
82 |
|
degree_sum = 0
|
|
83 |
|
for relevant_node in relevant_nodes:
|
|
84 |
|
degree_sum += self.graph.degree(relevant_node)
|
|
85 |
|
result = float(degree_sum)/float(len(relevant_nodes))
|
|
86 |
|
return result
|
|
87 |
|
|
|
88 |
|
def iterated_average_neighbour_degree_gt(self, node):
|
|
89 |
|
print 'Calculating iterated_average_neighbour degree with graph tool for node',node
|
|
90 |
|
|
|
91 |
|
result = 0 # initialise
|
|
92 |
|
|
|
93 |
|
vertex = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
94 |
|
first_level_neighbours = vertex.all_neighbors()
|
|
95 |
|
|
|
96 |
|
if len(first_level_neighbours) != 0:
|
|
97 |
|
second_level_neighbours = []
|
|
98 |
|
# get all two-hop nodes
|
|
99 |
|
for first_level_neighbour in first_level_neighbours:
|
|
100 |
|
current_second_level_neighbours = first_level_neighbour.all_neighbours()
|
|
101 |
|
second_level_neighbours.extend(current_second_level_neighbours)
|
|
102 |
|
|
|
103 |
|
#remove one-hop nodes and self
|
|
104 |
|
relevant_vertices = set(second_level_neighbours) - set(first_level_neighbours) - set([vertex])
|
|
105 |
|
|
|
106 |
|
if len(relevant_vertices) != 0:
|
|
107 |
|
# if degree has not been calculated, yet, calculate degree for all nodes
|
|
108 |
|
if not hasattr(self.g.vp, 'degree'):
|
|
109 |
|
self.g.vp['degree'] = self.g.degree_property_map("total")
|
|
110 |
|
|
|
111 |
|
degree_sum = 0 # initialise
|
|
112 |
|
for relevant_vertex in relevant_vertices:
|
|
113 |
|
degree_sum += self.g.vp.degree[relevant_vertex]
|
|
114 |
|
result = float(degree_sum)/float(len(relevant_vertices))
|
|
115 |
|
return result
|
|
116 |
|
|
|
117 |
|
def eccentricity(self, node):
|
|
118 |
|
print 'Calculating eccentricity for node', node
|
|
119 |
|
if not hasattr(self, 'all_eccentricities'):
|
|
120 |
|
l = gt.label_largest_component(self.g) #find the largest component
|
|
121 |
|
print ('Found the largest component')
|
|
122 |
|
# print ("Printing labeled largest component",l.a)
|
|
123 |
|
u = gt.GraphView(self.g, vfilt=l) # extract the largest component as a graph
|
|
124 |
|
print 'The number of vertices in the largest component is', u.num_vertices()
|
|
125 |
|
print 'The number of vertices in the original graph is', self.g.num_vertices()
|
|
126 |
|
# if nx.is_connected(self.graph) == True:
|
|
127 |
|
if (u.num_vertices() == nx.number_of_nodes(self.graph)):
|
|
128 |
|
print ("Graph is connected")
|
|
129 |
|
self.all_eccentricities = nx.eccentricity(self.graph)
|
|
130 |
|
print ("Calculated all eccentricities")
|
|
131 |
|
# print("Eccentricities are",self.all_eccentricities)
|
|
132 |
|
return self.all_eccentricities[node]
|
|
133 |
|
else:
|
|
134 |
|
# return 0
|
|
135 |
|
print("Graph is disconnected")
|
|
136 |
|
self.all_eccentricities = {}
|
|
137 |
|
if (self.all_eccentricities != {}):
|
|
138 |
|
print("Returning eccentricity for",node,"-",self.all_eccentricities[node])
|
|
139 |
|
return self.all_eccentricities[node]
|
|
140 |
|
else:
|
|
141 |
|
print("Returning 0")
|
|
142 |
|
return 0
|
|
143 |
|
|
|
144 |
|
def eccentricity_gt(self, node):
|
|
145 |
|
print 'Calculating eccentricity with graph tool for node', node
|
|
146 |
|
|
|
147 |
|
#find index of node
|
|
148 |
|
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
149 |
|
|
|
150 |
|
if not hasattr(self.g.gp,'pseudo_diameter'):
|
|
151 |
|
# find approx. diameter
|
|
152 |
|
print 'Finding maximum distance for walk'
|
|
153 |
|
self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
|
|
154 |
|
self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
|
|
155 |
|
# endpoints will not be used
|
|
156 |
|
|
|
157 |
|
#find all distances from node
|
|
158 |
|
distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
|
|
159 |
|
#calculate maximum
|
|
160 |
|
maximum = np.ma.max(np.ma.masked_where(distances > 2147483646, distances),0)
|
|
161 |
|
return maximum
|
|
162 |
|
|
|
163 |
|
def eccentricity_gt_s(self, node):
|
|
164 |
|
print 'Calculating eccentricity for small graphs with graph tool for node', node
|
|
165 |
|
eccentricity = 0 # initialise
|
|
166 |
|
|
|
167 |
|
#find index of node
|
|
168 |
|
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
169 |
|
#get all shortest path lengths
|
|
170 |
|
if not hasattr(self, 'all_distances'):
|
|
171 |
|
self.all_distances = gt.shortest_distance(self.g)
|
|
172 |
|
|
|
173 |
|
for distance in self.all_distances[node_index]:
|
|
174 |
|
if distance < 2147483647: # disregard all nodes which are not accessible
|
|
175 |
|
eccentricity = max(eccentricity, distance)
|
|
176 |
|
return eccentricity
|
|
177 |
|
|
|
178 |
|
def betweenness_centrality(self, node):
|
|
179 |
|
print 'Calculating betweenness_centrality for node',node
|
|
180 |
|
if not hasattr(self, 'all_betweenness_centralities'):
|
|
181 |
|
self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
|
|
182 |
|
return self.all_betweenness_centralities[node]
|
|
183 |
|
|
|
184 |
|
|
|
185 |
|
def betweenness_centrality_gt(self, node):
|
|
186 |
|
print 'Calculating betweenness_centrality with graph_tool for node',node
|
|
187 |
|
# print('Self is',self.graph_gt['graph_gt'])
|
|
188 |
|
# print('Self is also',self.graph_gt['graph_gt_labels'])
|
|
189 |
|
# def convert_graph(g):
|
|
190 |
|
#converts a networkX graph to graph_tool
|
|
191 |
|
#important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
|
|
192 |
|
# adj = nx.adjacency_matrix(g)
|
|
193 |
|
# j = gt.Graph(directed=False)
|
|
194 |
|
# j.add_vertex(len(adj))
|
|
195 |
|
# num_vertices = adj.shape[0]
|
|
196 |
|
# for i in range(num_vertices - 1):
|
|
197 |
|
# for l in range(i + 1, num_vertices):
|
|
198 |
|
# if adj[i,l] != 0:
|
|
199 |
|
# j.add_edge(i, l)
|
|
200 |
|
# return j
|
|
201 |
|
|
|
202 |
|
|
|
203 |
|
if not hasattr(self.g.vertex_properties, 'betweenness'):
|
|
204 |
|
vp,ep = gt.betweenness(self.g)
|
|
205 |
|
# internalize property maps
|
|
206 |
|
self.g.vertex_properties.betweenness = vp
|
|
207 |
|
self.g.edge_properties.betweenness = ep
|
|
208 |
|
node_index = gt.find_vertex(self.g,self.label_map,node)[0]
|
|
209 |
|
# print("Node",node,"has index",node_label)
|
|
210 |
|
# print('Vp is',vp)
|
|
211 |
|
# print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
|
|
212 |
|
|
|
213 |
|
return self.g.vp.betweenness[self.g.vertex(node_index)]
|
|
214 |
|
|
|
215 |
|
def average_shortest_path_length(self, node):
|
|
216 |
|
print 'Calculating average_shortest_path_length for node',node
|
|
217 |
|
# caching average_shortest_path_length for all nodes at one failed
|
|
218 |
|
# already switched to single calculation
|
|
219 |
|
|
|
220 |
|
#get all shortest path lengths
|
|
221 |
|
all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)
|
|
222 |
|
|
|
223 |
|
#calculate average
|
|
224 |
|
sum_of_lengths = 0
|
|
225 |
|
for target in all_shortest_path_lengths_for_node:
|
|
226 |
|
sum_of_lengths += all_shortest_path_lengths_for_node[target]
|
|
227 |
|
|
|
228 |
|
return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)
|
|
229 |
|
|
|
230 |
|
def average_shortest_path_length_gt(self, node):
|
|
231 |
|
print 'Calculating average_shortest_path_length with graph tool for node',node
|
|
232 |
|
#find index of node
|
|
233 |
|
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
234 |
|
|
|
235 |
|
if not hasattr(self.g.gp,'pseudo_diameter'):
|
|
236 |
|
# find approx. diameter
|
|
237 |
|
print 'Finding maximum distance for walk'
|
|
238 |
|
self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
|
|
239 |
|
self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
|
|
240 |
|
# endpoints will not be used
|
|
241 |
|
|
|
242 |
|
#find all distances from node
|
|
243 |
|
distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
|
|
244 |
|
#calculate average
|
|
245 |
|
average = np.ma.average(np.ma.masked_where(distances > 2147483646, distances))
|
|
246 |
|
return float(average)
|
|
247 |
|
|
|
248 |
|
def average_shortest_path_length_gt_small_graphs(self, node):
|
|
249 |
|
print 'Calculating average_shortest_path_length for small graphs with graph tool for node',node
|
|
250 |
|
result = 0 # initialise
|
|
251 |
|
|
|
252 |
|
#find index of node
|
|
253 |
|
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
254 |
|
#get all shortest path lengths
|
|
255 |
|
if not hasattr(self, 'all_distances'):
|
|
256 |
|
self.all_distances = gt.shortest_distance(self.g)
|
|
257 |
|
|
|
258 |
|
distances = self.all_distances[node_index]
|
|
259 |
|
#calculate average
|
|
260 |
|
sum_of_distances = 0
|
|
261 |
|
accessible_nodes = 0
|
|
262 |
|
for distance in distances:
|
|
263 |
|
if distance < 2147483647: # disregard all nodes in other components
|
|
264 |
|
sum_of_distances += distance
|
|
265 |
|
accessible_nodes += 1
|
|
266 |
|
if accessible_nodes != 0:
|
|
267 |
|
result = float(sum_of_distances)/float(accessible_nodes)
|
|
268 |
|
return result
|
|
269 |
|
|
|
270 |
|
def deterioration(self, node):
|
|
271 |
|
print'Calculating deterioration due to removal of node', node
|
|
272 |
|
|
|
273 |
|
#g = self.graph_gt['graph_gt']
|
|
274 |
|
#g.vp.temp = g.new_vertex_property("bool") #create property map for exclusion
|
|
275 |
|
#g.vp.temp.a = 1 #initialise filter map
|
|
276 |
|
node_index = gt.find_vertex(self.g, self.label_map, node)[0]
|
|
277 |
|
self.exclusion_map[node_index] = 0 #take out node
|
|
278 |
|
u = gt.GraphView(self.g, vfilt = self.exclusion_map)
|
|
279 |
|
u = gt.GraphView(self.g, vfilt = gt.label_largest_component(u))
|
|
280 |
|
p = 100.0*(1.0-float(u.num_vertices())/float(self.glc.num_vertices()))
|
|
281 |
|
self.exclusion_map[node_index] = 1 #reset node
|
|
282 |
|
|
|
283 |
|
return p
|
|
284 |
|
|
|
285 |
|
#############
|
|
286 |
|
# advanced metrics
|
|
287 |
|
#############
|
|
288 |
|
def correct_clustering_coefficient(self,node):
|
|
289 |
|
print 'Calculating correct_clustering_coefficient for node',node
|
|
290 |
|
clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
|
|
291 |
|
degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
|
|
292 |
|
max_degree = self.redis.zrange(self.metric_prefix+'degree', -1, -1, withscores=True, score_cast_func=float)[0][1]
|
|
293 |
|
corrected_cc = clustering_coefficient * np.log(degree) / np.log(max_degree)
|
|
294 |
|
return corrected_cc
|
|
295 |
|
|
|
296 |
|
def correct_clustering_coefficient_old(self,node):
|
|
297 |
|
print 'Calculating correct_clustering_coefficient for node',node
|
|
298 |
|
clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
|
|
299 |
|
degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
|
|
300 |
|
corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
|
|
301 |
|
return corrected_cc
|
|
302 |
|
|
|
303 |
|
def correct_average_neighbor_degree(self,node):
|
|
304 |
|
print 'Calculating correct_average_neighbor degree for node',node
|
|
305 |
|
avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
|
|
306 |
|
|
|
307 |
|
if avgnd == 0.0:
|
|
308 |
|
result = avgnd
|
|
309 |
|
else:
|
|
310 |
|
neighbors = self.graph.neighbors(node)
|
|
311 |
|
number_of_neighbors = float(len(neighbors))
|
|
312 |
|
if number_of_neighbors == 0.0:
|
|
313 |
|
result = avgnd
|
|
314 |
|
else:
|
|
315 |
|
neighbor_degrees = []
|
|
316 |
|
for neighbor in neighbors:
|
|
317 |
|
neighbor_degrees.append(self.graph.degree(neighbor))
|
|
318 |
|
|
|
319 |
|
#using numpy median and standard deviation implementation
|
|
320 |
|
numpy_neighbor_degrees = np.array(neighbor_degrees)
|
|
321 |
|
standard_deviation = np.std(numpy_neighbor_degrees)
|
|
322 |
|
if standard_deviation == 0.0:
|
|
323 |
|
result = avgnd
|
|
324 |
|
else:
|
|
325 |
|
median = np.median(numpy_neighbor_degrees)
|
|
326 |
|
result = avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
|
|
327 |
|
return result
|
|
328 |
|
|
|
329 |
|
def correct_iterated_average_neighbor_degree(self, node):
|
|
330 |
|
print 'Calculating correct_iterated_average_neighbor_degree for node '+str(node)
|
|
331 |
|
iand = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
|
|
332 |
|
ciand = iand
|
|
333 |
|
if iand != 0.0:
|
|
334 |
|
first_level_neighbors = self.graph.neighbors(node)
|
|
335 |
|
second_level_neighbors = []
|
|
336 |
|
|
|
337 |
|
# get all two-hop nodes
|
|
338 |
|
for first_level_neighbor in first_level_neighbors:
|
|
339 |
|
current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
|
|
340 |
|
second_level_neighbors.extend(current_second_level_neighbors)
|
|
341 |
|
|
|
342 |
|
#remove one-hop neighbors and self
|
|
343 |
|
relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
|
|
344 |
|
|
|
345 |
|
if len(relevant_nodes) != 0:
|
|
346 |
|
node_degrees = []
|
|
347 |
|
for relevant_node in relevant_nodes:
|
|
348 |
|
node_degrees.append(self.graph.degree(relevant_node))
|
|
349 |
|
|
|
350 |
|
numpy_node_degrees = np.array(node_degrees)
|
|
351 |
|
standard_deviation = np.std(numpy_node_degrees)
|
|
352 |
|
if standard_deviation != 0.0:
|
|
353 |
|
median = np.median(numpy_node_degrees)
|
|
354 |
|
ciand = iand + ( ((median - iand) / standard_deviation) / float(len(relevant_nodes)) ) * iand
|
|
355 |
|
return ciand
|
|
356 |
|
|
|
357 |
|
|
|
358 |
|
|
|
359 |
|
|
|
360 |
|
|
|
361 |
|
|
File percolation.py added (mode: 100644) (index 0000000..7038340) |
|
1 |
|
import graph_tool.all as gt |
|
2 |
|
import numpy as np |
|
3 |
|
import datetime as dt |
|
4 |
|
import visualization |
|
5 |
|
|
|
6 |
|
############################################### |
|
7 |
|
### NOTE: We use the largest component, not ### |
|
8 |
|
### the entire graph for the calculation ### |
|
9 |
|
############################################### |
|
10 |
|
|
|
11 |
|
############################################### |
|
12 |
|
### functions used by all percolation modes ### |
|
13 |
|
############################################### |
|
14 |
|
def percolation(percolated_graph,intact_graph): |
|
15 |
|
return 100.0*(1.0-float(percolated_graph.num_vertices())/float(intact_graph.num_vertices())) |
|
16 |
|
|
|
17 |
|
def print_info(flc, glc): |
|
18 |
|
print 'filtered graph - vertices: '+str(flc.num_vertices())+' / edges: '+str(flc.num_edges()) |
|
19 |
|
print 'percolation: '+str(percolation(flc,glc))+'%' |
|
20 |
|
|
|
21 |
|
# the function below was needed in previous versions of CoRiA because the set of set members was nested within another set |
|
22 |
|
#def read_redis_smembers(redis,key): |
|
23 |
|
# s = redis.smembers(key) #read set |
|
24 |
|
#return [i.strip() for i in [l.strip('[]').split(',') for l in s][0]] #write list and strip of useless characters |
|
25 |
|
|
|
26 |
|
|
|
27 |
|
################################# |
|
28 |
|
####### percolation modes ####### |
|
29 |
|
################################# |
|
30 |
|
|
|
31 |
|
# These percolation modes take as input the mode name and n - a dictionary of |
|
32 |
|
# numbers of nodes to take out (as keys) and corresponding percentages. |
|
33 |
|
# They return a dictionary of percentage keys and percolation values. |
|
34 |
|
# Advanced percolation modes nest this dictionary within a dictionary of groups. |
|
35 |
|
# Therefore, they require a loop over these groups, which can be e.g. metrics or countries. |
|
36 |
|
|
|
37 |
|
################################# |
|
38 |
|
#### BASIC PERCOLATION MODES #### |
|
39 |
|
################################# |
|
40 |
|
def failure(self, mode_name, n): |
|
41 |
|
print 'Calculating percolation due to random failure' |
|
42 |
|
# initialise |
|
43 |
|
counter = 0 |
|
44 |
|
results = {} |
|
45 |
|
|
|
46 |
|
# take a random sample from the largest component |
|
47 |
|
for v in np.random.choice(list(self.glc.vertices()),size=max(n.keys()),replace=False): |
|
48 |
|
self.exclusion_map[self.g.vertex(v)] = 0 |
|
49 |
|
counter += 1 |
|
50 |
|
if counter in n.keys(): |
|
51 |
|
print counter,'nodes removed' |
|
52 |
|
# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map) |
|
53 |
|
f = gt.GraphView(self.g, vfilt = self.exclusion_map) |
|
54 |
|
# largest component of graph f |
|
55 |
|
flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) |
|
56 |
|
print_info(flc,self.glc) |
|
57 |
|
results[n[counter]] = percolation(flc,self.glc) |
|
58 |
|
# visualize deterioration |
|
59 |
|
# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[counter]))+'_pct') |
|
60 |
|
|
|
61 |
|
return results |
|
62 |
|
|
|
63 |
|
##################################################################### |
|
64 |
|
|
|
65 |
|
def random_walk(self, mode_name, n): |
|
66 |
|
print 'Calculating percolation due to random walk' |
|
67 |
|
#first vertex for random walk |
|
68 |
|
start = self.glc.vertex(np.random.randint(0,self.glc.num_vertices()), use_index=False) |
|
69 |
|
|
|
70 |
|
#do random walk |
|
71 |
|
alternate_list = list(self.label_map.a) |
|
72 |
|
np.random.shuffle(alternate_list) |
|
73 |
|
results = rw(self,start,n,alternate_list,mode_name) |
|
74 |
|
|
|
75 |
|
#return dict(zip(percentages,percolations)) |
|
76 |
|
return results |
|
77 |
|
|
|
78 |
|
##################################################################### |
|
79 |
|
|
|
80 |
|
################################# |
|
81 |
|
## ADVANCED PERCOLATION MODES ### |
|
82 |
|
################################# |
|
83 |
|
def target_list(self, mode_name, n): |
|
84 |
|
print 'Calculating percolation due to targeted attack , i.e. taking out top nodes from a target list' |
|
85 |
|
# instantiate results dictionary and target lists |
|
86 |
|
results = {} |
|
87 |
|
nodes_max = {} |
|
88 |
|
|
|
89 |
|
#loop through all metrics |
|
90 |
|
all_metrics = list(self.base_metrics.keys() + self.advanced_metrics.keys()) |
|
91 |
|
for metric in all_metrics: |
|
92 |
|
#get nodes with highest value of metric |
|
93 |
|
nodes_max[metric] = self.redis.zrange(self.metric_prefix+metric+self.normalization_suffix, -max(n.keys()), -1, withscores=False, score_cast_func=float).reverse() |
|
94 |
|
#loop through all scores |
|
95 |
|
all_scores = list(self.scores.keys() + self.advanced_scores.keys()) |
|
96 |
|
for score in all_scores: |
|
97 |
|
#get nodes with highest value of score |
|
98 |
|
nodes_max[score] = self.redis.zrange(self.score_prefix+score, -max(n.keys()), -1, withscores=False, score_cast_func=float).reverse() |
|
99 |
|
|
|
100 |
|
#loop through all metrics and scores |
|
101 |
|
for metric in all_metrics+all_scores: |
|
102 |
|
print 'Taking out top nodes for metric',metric |
|
103 |
|
|
|
104 |
|
# initialise variables and exclusion map |
|
105 |
|
counter = 0 |
|
106 |
|
self.exclusion_map.a = 1 |
|
107 |
|
results[metric] = {} |
|
108 |
|
|
|
109 |
|
for node in nodes_max[metric]: |
|
110 |
|
vertex = gt.find_vertex(self.g,self.label_map,node)[0] |
|
111 |
|
self.exclusion_map[vertex] = 0 |
|
112 |
|
counter += 1 |
|
113 |
|
if counter in n.keys(): |
|
114 |
|
print counter,'nodes removed' |
|
115 |
|
# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map) |
|
116 |
|
f = gt.GraphView(self.g, vfilt = self.exclusion_map) |
|
117 |
|
# largest component of graph f |
|
118 |
|
flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) |
|
119 |
|
print_info(flc,self.glc) |
|
120 |
|
results[metric][n[counter]] = percolation(flc,self.glc) |
|
121 |
|
# visualize deterioration |
|
122 |
|
# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+metric+'_'+str(int(n[counter]))+'_pct') |
|
123 |
|
|
|
124 |
|
return results |
|
125 |
|
|
|
126 |
|
##################################################################### |
|
127 |
|
|
|
128 |
|
def hybrid_mode(self, mode_name, n): |
|
129 |
|
print 'Calculating percolation due to random walk starting from node with highest value of metric' |
|
130 |
|
# instantiate results dictionary and alternate lists for random walk |
|
131 |
|
results = {} |
|
132 |
|
alternate_lists = {} |
|
133 |
|
|
|
134 |
|
#loop through all metrics |
|
135 |
|
all_metrics = list(self.base_metrics.keys() + self.advanced_metrics.keys()) |
|
136 |
|
for metric in all_metrics: |
|
137 |
|
#get nodes with highest value of metric |
|
138 |
|
temp_list = self.redis.zrange(self.metric_prefix+metric+self.normalization_suffix, 0, -1, withscores=False, score_cast_func=float) |
|
139 |
|
alternate_lists[metric] = [node for node in reversed(temp_list)] |
|
140 |
|
|
|
141 |
|
#loop through all scores |
|
142 |
|
all_scores = list(self.scores.keys() + self.advanced_scores.keys()) |
|
143 |
|
for score in all_scores: |
|
144 |
|
#get nodes with highest value of score |
|
145 |
|
temp_list = self.redis.zrange(self.score_prefix+score, 0, -1, withscores=False, score_cast_func=float) |
|
146 |
|
alternate_lists[score] = [node for node in reversed(temp_list)] |
|
147 |
|
|
|
148 |
|
#loop through all metrics and scores |
|
149 |
|
for metric in all_metrics+all_scores: |
|
150 |
|
print 'Starting from node with highest value of metric',metric |
|
151 |
|
#initialise exclusion vertex property map |
|
152 |
|
self.exclusion_map.a = 1 |
|
153 |
|
|
|
154 |
|
#first vertex for random walk |
|
155 |
|
start = gt.find_vertex(self.g,self.label_map,alternate_lists[metric][0])[0] |
|
156 |
|
|
|
157 |
|
#do random walk |
|
158 |
|
results[metric] = rw(self,start,n,alternate_lists[metric],mode_name+'_'+metric) |
|
159 |
|
|
|
160 |
|
return results |
|
161 |
|
|
|
162 |
|
def russian(self, mode_name, n): |
|
163 |
|
print 'Calculating percolation due to shutting off the Russian network from the internet' |
|
164 |
|
# instantiate results dictionary and target lists |
|
165 |
|
#results = {} |
|
166 |
|
#nodes_max = {} |
|
167 |
|
self.exclusion_map.a = 0 |
|
168 |
|
counter = 0 |
|
169 |
|
results = {} |
|
170 |
|
for v in self.g.vertices(): |
|
171 |
|
if self.g.vp.country_code[v] == 'RU': |
|
172 |
|
print 'Shutting off node',int(v),'because it\'s Russian!' |
|
173 |
|
self.exclusion_map[v] = 1 |
|
174 |
|
counter += 1 |
|
175 |
|
# if counter in n.keys(): |
|
176 |
|
# print counter,'nodes removed' |
|
177 |
|
# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map) |
|
178 |
|
# f = gt.GraphView(self.g, vfilt = self.exclusion_map) |
|
179 |
|
# largest component of graph f |
|
180 |
|
# flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) |
|
181 |
|
# print_info(flc,self.glc) |
|
182 |
|
# results[n[counter]] = percolation(flc,self.glc) |
|
183 |
|
# visualize deterioration |
|
184 |
|
# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+metric+'_'+str(int(n[counter]))+'_pct') |
|
185 |
|
|
|
186 |
|
f = gt.GraphView(self.g, vfilt = self.exclusion_map) |
|
187 |
|
flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) |
|
188 |
|
#results[max(n.values())] = percolation(flc,self.g) |
|
189 |
|
# visualize deterioration |
|
190 |
|
print 'Creating visualization #1 of the deterioration.' |
|
191 |
|
visualization.draw_deterioration(self,self.g.vp.sfdp,mode_name+"_SFDP_inverse") |
|
192 |
|
print 'Creating visualization #2 of the deterioration.' |
|
193 |
|
visualization.draw_deterioration(self,self.g.vp.Random,mode_name+"_Random_inverse") |
|
194 |
|
print 'Creating visualization #3 of the deterioration.' |
|
195 |
|
visualization.draw_deterioration(self,self.g.vp.Radial,mode_name+"_Radial_inverse") |
|
196 |
|
#return results |
|
197 |
|
##################################################################### |
|
198 |
|
############## Random Walk for the RW deletion modes ################ |
|
199 |
|
##################################################################### |
|
200 |
|
|
|
201 |
|
# takes as input a start vertex, the number of vertices to take out |
|
202 |
|
# and an alternate list of vertices if the random walk reaches a dead end |
|
203 |
|
|
|
204 |
|
def rw(self, vertex, n, alternate_list, mode_name): |
|
205 |
|
# initialise |
|
206 |
|
results = {} |
|
207 |
|
|
|
208 |
|
self.exclusion_map[vertex] = 0 #take out start vertex |
|
209 |
|
# initialise graph filters |
|
210 |
|
# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map) |
|
211 |
|
f = gt.GraphView(self.g, vfilt = self.exclusion_map) |
|
212 |
|
# largest component of graph f |
|
213 |
|
flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) |
|
214 |
|
if 1 in n.keys(): |
|
215 |
|
print '1 node removed' |
|
216 |
|
print_info(flc,self.glc) |
|
217 |
|
results[n[1]] = percolation(flc,self.glc) |
|
218 |
|
# visualize deterioration |
|
219 |
|
# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[1]))+'_pct') |
|
220 |
|
|
|
221 |
|
for i in range(max(n.keys())-1): |
|
222 |
|
neighbours = list(vertex.all_neighbours()) |
|
223 |
|
flag = 0 #decision flag |
|
224 |
|
|
|
225 |
|
# choose a random neighbour |
|
226 |
|
if len(neighbours) > 0: |
|
227 |
|
np.random.shuffle(neighbours) |
|
228 |
|
for neighbour in neighbours: |
|
229 |
|
if self.exclusion_map[neighbour] != 0: |
|
230 |
|
vertex = neighbour |
|
231 |
|
flag = 1 |
|
232 |
|
break |
|
233 |
|
|
|
234 |
|
# to be executed if no neighbours exist - choose the next node out of an alternative list |
|
235 |
|
if flag == 0: |
|
236 |
|
# create a list of already used list members |
|
237 |
|
used_list = [] |
|
238 |
|
for node in alternate_list: |
|
239 |
|
vertex = gt.find_vertex(self.g,self.label_map,node)[0] |
|
240 |
|
used_list.append(node) |
|
241 |
|
if self.exclusion_map[vertex] != 0: |
|
242 |
|
break |
|
243 |
|
if len(used_list) > 0: |
|
244 |
|
for used_node in used_list: |
|
245 |
|
# remove used members from alternate list. This reduces calculation time in next iteration |
|
246 |
|
alternate_list.remove(used_node) |
|
247 |
|
|
|
248 |
|
self.exclusion_map[vertex] = 0 #take out vertex |
|
249 |
|
f = gt.GraphView(self.g, vfilt = self.exclusion_map) #update graph (filtered) |
|
250 |
|
if i+2 in n.keys(): |
|
251 |
|
flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) #update largest component |
|
252 |
|
print i+2,'nodes removed' |
|
253 |
|
print_info(flc,self.glc) |
|
254 |
|
results[n[i+2]] = percolation(flc,self.glc) |
|
255 |
|
# visualize deterioration |
|
256 |
|
# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[i+2]))+'_pct') |
|
257 |
|
|
|
258 |
|
return results |
|
259 |
|
|
|
260 |
|
############################## |
|
261 |
|
############################## |
|
262 |
|
########## THE END ########### |
|
263 |
|
############################## |
|
264 |
|
############################## |
|
265 |
|
|
File ru_metric_calculator.py added (mode: 100644) (index 0000000..05d8f41) |
|
1 |
|
import networkx as nx
|
|
2 |
|
import graph_tool.all as gt
|
|
3 |
|
import redis as rd
|
|
4 |
|
import numpy as np
|
|
5 |
|
import indexing
|
|
6 |
|
import statistics
|
|
7 |
|
import normalizations
|
|
8 |
|
import config
|
|
9 |
|
import percolation
|
|
10 |
|
import visualization
|
|
11 |
|
import datetime as dt
|
|
12 |
|
|
|
13 |
|
|
|
14 |
|
class MetricCalculator(object):
|
|
15 |
|
def __init__ (self, graph, graph_gt):
|
|
16 |
|
#class constructor
|
|
17 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
|
|
18 |
|
|
|
19 |
|
print ('Starting metric_calculator!')
|
|
20 |
|
|
|
21 |
|
# for code evaluation
|
|
22 |
|
self.start_time = dt.datetime.now()
|
|
23 |
|
self.durations = {}
|
|
24 |
|
self.durations_in_seconds = {}
|
|
25 |
|
self.durations_in_percent = {}
|
|
26 |
|
|
|
27 |
|
self.graph = graph
|
|
28 |
|
self.graph_gt = graph_gt
|
|
29 |
|
|
|
30 |
|
# alternate name for graph tool graph
|
|
31 |
|
self.g = self.graph_gt['graph_gt']
|
|
32 |
|
# alternate name for graph tool labels
|
|
33 |
|
if not hasattr(self.g.vp, 'label_map'):
|
|
34 |
|
self.g.vp.label_map = self.graph_gt['graph_gt_labels']
|
|
35 |
|
self.label_map = self.g.vp.label_map
|
|
36 |
|
# vertex property map for percolation calculations
|
|
37 |
|
if not hasattr(self.g.vp, 'exmap'):
|
|
38 |
|
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
|
|
39 |
|
|
|
40 |
|
self.exclusion_map = self.g.vp.exmap
|
|
41 |
|
self.exclusion_map.a = 1 #initialise filter map
|
|
42 |
|
#find largest component of graph tool graph for percolation calculations
|
|
43 |
|
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
|
|
44 |
|
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
|
|
45 |
|
|
|
46 |
|
if not hasattr(self.g.vp, 'eigenvector'):
|
|
47 |
|
eigenvalue, self.g.vp.eigenvector = gt.eigenvector(self.g)
|
|
48 |
|
if not hasattr(self.g.ep, 'betweenness'):
|
|
49 |
|
betweenness,self.g.ep.betweenness = gt.betweenness(self.g)
|
|
50 |
|
|
|
51 |
|
self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
|
|
52 |
|
self.nodes = nx.nodes(graph)
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access
|
|
56 |
|
self.graph_index_key = config.graph_index_key
|
|
57 |
|
|
|
58 |
|
self.graph_name = ''
|
|
59 |
|
while (self.graph_name == ''):
|
|
60 |
|
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
|
|
61 |
|
|
|
62 |
|
self.info_index_key = self.graph_name+':'+config.info_index_key
|
|
63 |
|
self.node_index_key = self.graph_name+':'+config.node_index_key
|
|
64 |
|
self.metric_index_key = self.graph_name+':'+config.metric_index_key
|
|
65 |
|
self.score_index_key = self.graph_name+':'+config.score_index_key
|
|
66 |
|
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
|
|
67 |
|
self.layout_index_key = self.graph_name+':'+config.layout_index_key
|
|
68 |
|
|
|
69 |
|
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
|
|
70 |
|
self.node_prefix = self.graph_name+':'+config.node_prefix
|
|
71 |
|
self.metric_prefix = self.graph_name+':'+config.metric_prefix
|
|
72 |
|
self.score_prefix = self.graph_name+':'+config.score_prefix
|
|
73 |
|
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
|
|
74 |
|
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
|
|
75 |
|
|
|
76 |
|
self.normalization_suffix = config.normalization_suffix
|
|
77 |
|
|
|
78 |
|
self.base_metrics = config.base_metrics
|
|
79 |
|
self.advanced_metrics = config.advanced_metrics
|
|
80 |
|
|
|
81 |
|
self.normalization_methods = config.normalization_methods
|
|
82 |
|
|
|
83 |
|
self.scores = config.scores
|
|
84 |
|
self.advanced_scores = config.advanced_scores
|
|
85 |
|
|
|
86 |
|
self.visualization_layouts = config.visualization_layouts
|
|
87 |
|
# this is commented out for testing purposes
|
|
88 |
|
# self.percolation_modes = config.percolation_modes
|
|
89 |
|
# self.advanced_percolation_modes = config.advanced_percolation_modes
|
|
90 |
|
|
|
91 |
|
self.percolation_modes = {'russian_shutoff':config.percolation_modes['russian_shutoff']}
|
|
92 |
|
self.advanced_percolation_modes = {}
|
|
93 |
|
|
|
94 |
|
##############################################################################
|
|
95 |
|
###### start describes the entire calculation in a high level overview #######
|
|
96 |
|
##############################################################################
|
|
97 |
|
|
|
98 |
|
def start(self):
|
|
99 |
|
start_time_calculation = dt.datetime.now()
|
|
100 |
|
|
|
101 |
|
#preliminary calculations
|
|
102 |
|
#self.flush_database()
|
|
103 |
|
self.obtain_percentages()
|
|
104 |
|
#self.create_info()
|
|
105 |
|
#self.create_standard_layout()
|
|
106 |
|
#self.save_graph_data('raw')
|
|
107 |
|
|
|
108 |
|
#index creation
|
|
109 |
|
#self.create_indexes()
|
|
110 |
|
|
|
111 |
|
#main calculations
|
|
112 |
|
#self.calculate_metrics()
|
|
113 |
|
#self.calculate_advanced_metrics()
|
|
114 |
|
#self.normalize_metrics()
|
|
115 |
|
#self.calculate_scores()
|
|
116 |
|
#self.calculate_advanced_scores()
|
|
117 |
|
|
|
118 |
|
#statistics
|
|
119 |
|
#self.calculate_statistics()
|
|
120 |
|
|
|
121 |
|
#dynamic metrics / percolation
|
|
122 |
|
self.calculate_percolation()
|
|
123 |
|
|
|
124 |
|
#visualization
|
|
125 |
|
#self.visualize_graph()
|
|
126 |
|
|
|
127 |
|
#save final graph
|
|
128 |
|
self.save_graph_data('russian')
|
|
129 |
|
|
|
130 |
|
#evaluation
|
|
131 |
|
self.duration_total = dt.datetime.now() - start_time_calculation
|
|
132 |
|
self.evaluate_durations()
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
###################
|
|
136 |
|
## PRELIMINARIES ##
|
|
137 |
|
###################
|
|
138 |
|
def flush_database(self):
|
|
139 |
|
# ask to clean all data in Redis
|
|
140 |
|
flush_flag = 'Flushing'
|
|
141 |
|
while (flush_flag != 'y' and flush_flag != 'n'):
|
|
142 |
|
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
|
|
143 |
|
if flush_flag == 'y':
|
|
144 |
|
self.redis.flushdb()
|
|
145 |
|
|
|
146 |
|
def obtain_percentages(self):
|
|
147 |
|
# obtain percentages for calculation of deterioration #
|
|
148 |
|
# and calculate number of nodes to remove from graph ##
|
|
149 |
|
percentages = '' # initialise
|
|
150 |
|
while (percentages == ''):
|
|
151 |
|
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
|
|
152 |
|
|
|
153 |
|
percentages = sorted([float(pct)for pct in percentages.split()])
|
|
154 |
|
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
|
|
155 |
|
# create a dictionary of percentages and corresponding numbers of nodes
|
|
156 |
|
self.percentages = dict(zip(numbers,percentages))
|
|
157 |
|
# storing values in redis DB
|
|
158 |
|
#self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
|
|
159 |
|
|
|
160 |
|
def create_info(self):
|
|
161 |
|
#store general info about graph
|
|
162 |
|
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
|
|
163 |
|
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
|
|
164 |
|
|
|
165 |
|
def create_standard_layout(self):
|
|
166 |
|
# create a standard layout
|
|
167 |
|
start_time = dt.datetime.now()
|
|
168 |
|
print 'Creating standard layout for graph visualization.'
|
|
169 |
|
if not hasattr(self.g.vp,'sfdp'):
|
|
170 |
|
self.g.vp.sfdp = gt.sfdp_layout(self.g, C=0.5)
|
|
171 |
|
#self.durations['SFDP_layout'] = dt.datetime.now() - start_time
|
|
172 |
|
print self.durations['SFDP_layout']
|
|
173 |
|
|
|
174 |
|
def save_graph_data(self,name):
|
|
175 |
|
# save graph
|
|
176 |
|
start_time = dt.datetime.now()
|
|
177 |
|
print 'Saving raw graph data'
|
|
178 |
|
self.g.save(self.graph_name+'_'+name+'.gt.gz')
|
|
179 |
|
self.durations['saving_graph'+name] = dt.datetime.now() - start_time
|
|
180 |
|
|
|
181 |
|
##################
|
|
182 |
|
#### INDEXING ####
|
|
183 |
|
##################
|
|
184 |
|
def create_indexes(self):
|
|
185 |
|
start_time = dt.datetime.now()
|
|
186 |
|
#call methods defined in indexing.py
|
|
187 |
|
#indexing.index_graph(self)
|
|
188 |
|
#indexing.index_nodes(self)
|
|
189 |
|
#indexing.index_neighbors(self)
|
|
190 |
|
#indexing.index_metrics(self)
|
|
191 |
|
#indexing.index_scores(self)
|
|
192 |
|
#indexing.index_percolation(self)
|
|
193 |
|
self.durations['indexing'] = dt.datetime.now() - start_time
|
|
194 |
|
|
|
195 |
|
###########################
|
|
196 |
|
#### CALCULATION LOOPS ####
|
|
197 |
|
###########################
|
|
198 |
|
|
|
199 |
|
def calculate_metrics(self):
|
|
200 |
|
start_time_total = dt.datetime.now()
|
|
201 |
|
# loop through all defined metrics and call specified calculation method for each node
|
|
202 |
|
print ('Starting calculate_metrics')
|
|
203 |
|
for metric_name in self.base_metrics:
|
|
204 |
|
start_time = dt.datetime.now()
|
|
205 |
|
metric_method = self.base_metrics[metric_name]
|
|
206 |
|
|
|
207 |
|
# loop through all nodes
|
|
208 |
|
for node in self.nodes:
|
|
209 |
|
# call calculation method of supplied metric for current node
|
|
210 |
|
node = int(node)
|
|
211 |
|
value = float(metric_method(self,node))
|
|
212 |
|
|
|
213 |
|
#store result in node values
|
|
214 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value)
|
|
215 |
|
|
|
216 |
|
#also store result to metric set
|
|
217 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
|
218 |
|
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
|
|
219 |
|
self.durations['metrics_total'] = dt.datetime.now() - start_time_total
|
|
220 |
|
|
|
221 |
|
|
|
222 |
|
def calculate_advanced_metrics(self):
|
|
223 |
|
start_time_total = dt.datetime.now()
|
|
224 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
|
225 |
|
print ('Starting calculate_advanced_metrics')
|
|
226 |
|
for advanced_metric_name in self.advanced_metrics:
|
|
227 |
|
start_time = dt.datetime.now()
|
|
228 |
|
metric_method = self.advanced_metrics[advanced_metric_name]
|
|
229 |
|
|
|
230 |
|
# loop through all nodes
|
|
231 |
|
for node in self.nodes:
|
|
232 |
|
node = int(node)
|
|
233 |
|
value = float(metric_method(self,node))
|
|
234 |
|
|
|
235 |
|
#store result in node values
|
|
236 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
|
|
237 |
|
|
|
238 |
|
#also store result to metric set
|
|
239 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
|
|
240 |
|
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
|
|
241 |
|
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
|
|
242 |
|
|
|
243 |
|
|
|
244 |
|
# loop through all defined normalizations and call respective normalization method
|
|
245 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash
|
|
246 |
|
def normalize_metrics(self):
|
|
247 |
|
start_time = dt.datetime.now()
|
|
248 |
|
#fallback normalization: min-max
|
|
249 |
|
print ('Starting normalize_metrics')
|
|
250 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
|
|
251 |
|
|
|
252 |
|
for metric_name in all_metrics:
|
|
253 |
|
if self.normalization_methods.has_key(metric_name):
|
|
254 |
|
normalization_method = self.normalization_methods[metric_name]
|
|
255 |
|
else:
|
|
256 |
|
#fallback normalization is min-max
|
|
257 |
|
normalization_method = normalizations.min_max
|
|
258 |
|
normalization_method(self,metric_name)
|
|
259 |
|
|
|
260 |
|
self.durations['normalizing'] = dt.datetime.now() - start_time
|
|
261 |
|
|
|
262 |
|
|
|
263 |
|
def calculate_scores(self):
|
|
264 |
|
start_time = dt.datetime.now()
|
|
265 |
|
print ('Starting calculate_scores')
|
|
266 |
|
for score_name in self.scores:
|
|
267 |
|
metrics_with_weights = self.scores[score_name]
|
|
268 |
|
|
|
269 |
|
for node in self.nodes:
|
|
270 |
|
score_value = 0.0
|
|
271 |
|
|
|
272 |
|
# get normalized values
|
|
273 |
|
for metric in metrics_with_weights:
|
|
274 |
|
weight = self.scores[score_name][metric]
|
|
275 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
|
|
276 |
|
score_value += weight * value
|
|
277 |
|
|
|
278 |
|
#redis_server.hset(key, value, number);
|
|
279 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
|
|
280 |
|
|
|
281 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
|
|
282 |
|
|
|
283 |
|
self.durations['scores'] = dt.datetime.now() - start_time
|
|
284 |
|
|
|
285 |
|
def calculate_advanced_scores(self):
|
|
286 |
|
start_time = dt.datetime.now()
|
|
287 |
|
print ('Starting calculate_advanced_scores')
|
|
288 |
|
for advanced_score in self.advanced_scores:
|
|
289 |
|
self.advanced_scores[advanced_score](self)
|
|
290 |
|
|
|
291 |
|
self.durations['adv_scores'] = dt.datetime.now() - start_time
|
|
292 |
|
|
|
293 |
|
|
|
294 |
|
#############
|
|
295 |
|
# statistics
|
|
296 |
|
#############
|
|
297 |
|
|
|
298 |
|
def calculate_statistics(self):
|
|
299 |
|
start_time = dt.datetime.now()
|
|
300 |
|
print ('Starting calculate_statistics')
|
|
301 |
|
for metric in self.base_metrics:
|
|
302 |
|
#absolute and normalized
|
|
303 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
|
|
304 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
|
|
305 |
|
|
|
306 |
|
for advanced_metric in self.advanced_metrics:
|
|
307 |
|
#absolute and normalized
|
|
308 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
|
|
309 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
|
|
310 |
|
|
|
311 |
|
for score in self.scores:
|
|
312 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score)
|
|
313 |
|
|
|
314 |
|
for advanced_score in self.advanced_scores:
|
|
315 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
|
|
316 |
|
self.durations['statistics:stats'] = dt.datetime.now() - start_time
|
|
317 |
|
|
|
318 |
|
start_time = dt.datetime.now()
|
|
319 |
|
statistics.calculate_correlations(self)
|
|
320 |
|
self.durations['statistics:corr'] = dt.datetime.now() - start_time
|
|
321 |
|
|
|
322 |
|
###################
|
|
323 |
|
# dynamic metrics #
|
|
324 |
|
###################
|
|
325 |
|
|
|
326 |
|
def calculate_percolation(self):
|
|
327 |
|
start_time_total = dt.datetime.now()
|
|
328 |
|
print ('Starting percolation calculation')
|
|
329 |
|
|
|
330 |
|
# shorten the name for percentages and corresponding numbers of nodes to remove
|
|
331 |
|
n = self.percentages
|
|
332 |
|
|
|
333 |
|
# BASIC PERCOLATION MODES
|
|
334 |
|
# basic percolation modes take mode_name and n as input and return a #
|
|
335 |
|
# dictionary with percentage of nodes removed as key and percentage ##
|
|
336 |
|
# of deterioration as value
|
|
337 |
|
for mode_name in self.percolation_modes:
|
|
338 |
|
start_time = dt.datetime.now()
|
|
339 |
|
# initialise exlusion vertex property map
|
|
340 |
|
self.exclusion_map.a = 1
|
|
341 |
|
# read method from config file
|
|
342 |
|
mode_method = self.percolation_modes[mode_name]
|
|
343 |
|
# execute method
|
|
344 |
|
#results = mode_method(self,mode_name,n)
|
|
345 |
|
mode_method(self,mode_name,n)
|
|
346 |
|
# index percolation mode
|
|
347 |
|
#self.redis.sadd(self.percolation_index_key, mode_name)
|
|
348 |
|
# store values
|
|
349 |
|
#print 'Storing percolation percentages'
|
|
350 |
|
#for percentage in results:
|
|
351 |
|
# value = results[percentage]
|
|
352 |
|
#store in hash set
|
|
353 |
|
#self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
|
|
354 |
|
|
|
355 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
356 |
|
|
|
357 |
|
# ADVANCED PERCOLATION MODES
|
|
358 |
|
# advanced percolation modes take mode_name and n as input and return a ###
|
|
359 |
|
# dictionary with groups of percolation modes (e.g. metrics, countries) ###
|
|
360 |
|
# as keys and dictionaries of percentages (removed: deteriorated) as values
|
|
361 |
|
for mode_name in self.advanced_percolation_modes:
|
|
362 |
|
start_time = dt.datetime.now()
|
|
363 |
|
# initialise exlusion vertex property map
|
|
364 |
|
self.exclusion_map.a = 1
|
|
365 |
|
# read method from config file
|
|
366 |
|
mode_method = self.advanced_percolation_modes[mode_name]
|
|
367 |
|
# execute method
|
|
368 |
|
results = mode_method(self,mode_name,n)
|
|
369 |
|
|
|
370 |
|
# store values
|
|
371 |
|
#print 'Storing percolation percentages'
|
|
372 |
|
#for group in results:
|
|
373 |
|
# index percolation modes
|
|
374 |
|
# self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
|
|
375 |
|
#for percentage in results[group]:
|
|
376 |
|
# value = results[group][percentage]
|
|
377 |
|
#store in hash set
|
|
378 |
|
#self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
|
|
379 |
|
|
|
380 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
381 |
|
|
|
382 |
|
self.durations['percolation_total'] = dt.datetime.now() - start_time_total
|
|
383 |
|
|
|
384 |
|
|
|
385 |
|
def visualize_graph(self):
|
|
386 |
|
|
|
387 |
|
for layout_name in self.visualization_layouts:
|
|
388 |
|
start_time = dt.datetime.now()
|
|
389 |
|
print 'Creating visualisation with '+layout_name+' layout'
|
|
390 |
|
|
|
391 |
|
layout_method = self.visualization_layouts[layout_name]
|
|
392 |
|
self.g.vp[layout_name] = layout_method(self)
|
|
393 |
|
gt.graph_draw(self.glc, pos=self.g.vp[layout_name], output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
|
|
394 |
|
|
|
395 |
|
self.redis.sadd(self.layout_index_key, layout_name)
|
|
396 |
|
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
|
|
397 |
|
print self.durations['layout:'+layout_name]
|
|
398 |
|
|
|
399 |
|
def evaluate_durations(self):
|
|
400 |
|
#print out times taken
|
|
401 |
|
print 'times taken:'
|
|
402 |
|
output = open(str(self.graph_name)+"_duration_test_2.txt","w")
|
|
403 |
|
output.write("Graph Name:\t"+str(self.graph_name)+"\n")
|
|
404 |
|
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
|
|
405 |
|
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
|
|
406 |
|
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
|
|
407 |
|
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
|
|
408 |
|
for key in self.durations:
|
|
409 |
|
self.durations_in_seconds[key] = self.durations[key].total_seconds()
|
|
410 |
|
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
|
|
411 |
|
|
|
412 |
|
print str(key)+'\t'+str(self.durations_in_percent[key])
|
|
413 |
|
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')
|
File visualization.py added (mode: 100644) (index 0000000..39e8f08) |
|
1 |
|
import graph_tool.all as gt |
|
2 |
|
import numpy as np |
|
3 |
|
|
|
4 |
|
# these methods give as output a property map of positions (i.e. the layout for the visualization) |
|
5 |
|
|
|
6 |
|
def random(self): |
|
7 |
|
# creating visualisation with Random layout |
|
8 |
|
pos = gt.random_layout(self.glc) |
|
9 |
|
coloured_drawing(self, pos, 'Random_Advanced', 'heptagon') |
|
10 |
|
# gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_RANDOM.png") |
|
11 |
|
return pos |
|
12 |
|
|
|
13 |
|
def frucht(self): |
|
14 |
|
# creating visualisation with Fruchtermann-Reingold layout |
|
15 |
|
pos = gt.fruchterman_reingold_layout(self.glc, r=1.8, n_iter=36) |
|
16 |
|
coloured_drawing(self, pos, 'Fruchterman_Reingold_Advanced', 'hexagon') |
|
17 |
|
# gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_FRUCHT.png") |
|
18 |
|
return pos |
|
19 |
|
|
|
20 |
|
def arf(self): |
|
21 |
|
# creating visualisation with ARF layout |
|
22 |
|
pos = gt.arf_layout(self.glc, max_iter=1000) |
|
23 |
|
# gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_ARF.png") |
|
24 |
|
return pos |
|
25 |
|
|
|
26 |
|
def radial(self): |
|
27 |
|
# creating visualisation with Radial Tree layout |
|
28 |
|
if not hasattr(self.g.vp, 'betweenness'): |
|
29 |
|
for i in range(0,self.g.num_vertices()): |
|
30 |
|
max_asn = self.redis.zrange(self.metric_prefix+'betweenness_centrality_(gt)',-i-1,-i-1,withscores=False,score_cast_func=float)[0] |
|
31 |
|
max_vertex = gt.find_vertex(self.glc, self.label_map, max_asn) |
|
32 |
|
#test whether vertex exists and vertex is in largest component |
|
33 |
|
if max_vertex != None: |
|
34 |
|
break #end loop |
|
35 |
|
pos = gt.radial_tree_layout(self.glc, root=max_vertex[0], weighted=True) |
|
36 |
|
else: |
|
37 |
|
if hasattr(self.g.vp, 'eigenvector'): |
|
38 |
|
temp = self.glc.vp.eigenvector |
|
39 |
|
else: |
|
40 |
|
temp = self.glc.vp.betweenness |
|
41 |
|
max_vertex = gt.find_vertex(self.glc, self.glc.vp.betweenness, np.max(self.glc.vp.betweenness.a)) |
|
42 |
|
pos = gt.radial_tree_layout(self.glc, root=max_vertex[0], rel_order=temp, weighted=True, node_weight=temp) |
|
43 |
|
|
|
44 |
|
#gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_RADIAL.png") |
|
45 |
|
coloured_drawing(self, pos, 'Radial_Advanced_2') |
|
46 |
|
return pos |
|
47 |
|
|
|
48 |
|
def sfdp(self): |
|
49 |
|
# creating visualisation with SFDP layout |
|
50 |
|
if not hasattr(self.g.vp, 'sfdp'): |
|
51 |
|
self.g.vp.sfdp = gt.sfdp_layout(self.glc) |
|
52 |
|
|
|
53 |
|
pos = self.g.vp.sfdp |
|
54 |
|
|
|
55 |
|
# test colouring of graph |
|
56 |
|
coloured_drawing(self, pos, 'SFDP_Advanced', 'octagon') |
|
57 |
|
return pos |
|
58 |
|
|
|
59 |
|
def coloured_drawing(self, pos, name, shape="circle"): |
|
60 |
|
if not hasattr(self, 'eigc'): |
|
61 |
|
# caching to reduce calculation time overall |
|
62 |
|
# define metric property maps for colouring |
|
63 |
|
eigc = self.g.vp.eigenvector.copy() |
|
64 |
|
ebwc = self.g.ep.betweenness.copy() |
|
65 |
|
|
|
66 |
|
# right-size property maps |
|
67 |
|
eigc.a = np.sqrt(eigc.a) |
|
68 |
|
eigc = gt.prop_to_size(eigc) |
|
69 |
|
ebwc = gt.prop_to_size(ebwc) |
|
70 |
|
#vsize = eigc.copy() # obtain property map for size of vertices |
|
71 |
|
eigc.a /= eigc.a.max() # normalization to 0-1 |
|
72 |
|
ebwc.a /= ebwc.a.max() # normalization to 0-1 |
|
73 |
|
# obtain maps for edges |
|
74 |
|
eorder = ebwc.copy() |
|
75 |
|
eorder.a *= -1 |
|
76 |
|
econtrol = self.g.new_edge_property("vector<double>") |
|
77 |
|
for e in self.glc.edges(): |
|
78 |
|
d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 3 |
|
79 |
|
econtrol[e] = [0.3,d,0.7,d] |
|
80 |
|
# storing for later access |
|
81 |
|
self.eigc = eigc |
|
82 |
|
self.ebwc = ebwc |
|
83 |
|
self.eorder = eorder |
|
84 |
|
self.econtrol= econtrol |
|
85 |
|
|
|
86 |
|
if not hasattr(self, 'vcolour'): |
|
87 |
|
vcolour = self.g.new_vertex_property("vector<double>") # obtain colour map |
|
88 |
|
for v in self.glc.vertices(): |
|
89 |
|
vcolour[v] = [self.eigc[v],self.eigc[v]/4.0,self.eigc[v]/6.0,(1.0+2.0*self.eigc[v])/3.0] |
|
90 |
|
#vsize[v] = int(vsize[v]) |
|
91 |
|
|
|
92 |
|
|
|
93 |
|
# obtain maps for edges |
|
94 |
|
ecolour = self.g.new_edge_property("vector<double>") |
|
95 |
|
for e in self.glc.edges(): |
|
96 |
|
ecolour[e] = [self.ebwc[e]/8.0,self.ebwc[e]/2.0,self.ebwc[e],(1.0+2.0*self.ebwc[e])/4.0] |
|
97 |
|
|
|
98 |
|
# storing for later access |
|
99 |
|
self.vcolour = vcolour |
|
100 |
|
self.ecolour = ecolour |
|
101 |
|
|
|
102 |
|
gt.graph_draw(self.glc, pos=pos, vertex_shape=shape, vertex_fill_color=self.vcolour, vorder=self.eigc, edge_color=self.ecolour, eorder=self.eorder, edge_control_points=self.econtrol, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+name+".png") |
|
103 |
|
|
|
104 |
|
self.redis.sadd(self.layout_index_key, name) |
|
105 |
|
|
|
106 |
|
def draw_deterioration(self, pos, name, shape="circle"): |
|
107 |
|
excl = self.exclusion_map.copy() |
|
108 |
|
if not hasattr(self, 'eigc'): |
|
109 |
|
# caching to reduce calculation time overall |
|
110 |
|
# define metric property maps for colouring |
|
111 |
|
eigc = self.g.vp.eigenvector.copy() |
|
112 |
|
ebwc = self.g.ep.betweenness.copy() |
|
113 |
|
# right-size property maps |
|
114 |
|
eigc.a = np.sqrt(eigc.a) |
|
115 |
|
eigc = gt.prop_to_size(eigc) |
|
116 |
|
ebwc = gt.prop_to_size(ebwc) |
|
117 |
|
#vsize = eigc.copy() # obtain property map for size of vertices |
|
118 |
|
eigc.a /= eigc.a.max() # normalization to 0-1 |
|
119 |
|
ebwc.a /= ebwc.a.max() # normalization to 0-1 |
|
120 |
|
# obtain maps for edges |
|
121 |
|
eorder = ebwc.copy() |
|
122 |
|
eorder.a *= -1 |
|
123 |
|
|
|
124 |
|
econtrol = self.g.new_edge_property("vector<double>") |
|
125 |
|
for e in self.glc.edges(): |
|
126 |
|
d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 3 |
|
127 |
|
econtrol[e] = [0.3,d,0.7,d] |
|
128 |
|
# storing for later access |
|
129 |
|
self.eigc = eigc |
|
130 |
|
self.ebwc = ebwc |
|
131 |
|
self.eorder = eorder |
|
132 |
|
self.econtrol= econtrol |
|
133 |
|
|
|
134 |
|
# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map) |
|
135 |
|
f = gt.GraphView(self.g, vfilt = excl) |
|
136 |
|
# largest component of graph f |
|
137 |
|
l = gt.label_largest_component(f) |
|
138 |
|
vfcolour = self.g.new_vertex_property("vector<double>") # obtain colour map |
|
139 |
|
vcolour = vfcolour.copy() |
|
140 |
|
for v in self.g.vertices(): |
|
141 |
|
excl[v] *= l[v] |
|
142 |
|
if excl[v] != 1: |
|
143 |
|
vfcolour[v] = [0.0,0.0,0.0,0.01] |
|
144 |
|
vcolour[v] = [0.0,0.0,0.0,0.05] |
|
145 |
|
else: |
|
146 |
|
vfcolour[v] = [self.eigc[v],self.eigc[v]/4.0,self.eigc[v]/6.0,(1.0+2.0*self.eigc[v])/3.0] |
|
147 |
|
vcolour[v] = [self.eigc[v]/2.0,self.eigc[v]/3.0,self.eigc[v]/4.0,(2.0+1.0*self.eigc[v])/3.0] |
|
148 |
|
#vsize[v] = int(vsize[v]) |
|
149 |
|
|
|
150 |
|
ecolour = self.g.new_edge_property("vector<double>") |
|
151 |
|
for e in self.g.edges(): |
|
152 |
|
if excl[e.target()] != 1 or excl[e.source()] != 1: |
|
153 |
|
ecolour[e] = [0.0,0.0,0.0,0.05] |
|
154 |
|
else: |
|
155 |
|
ecolour[e] = [self.ebwc[e]/8.0,self.ebwc[e]/2.0,self.ebwc[e],(1.0+2.0*self.ebwc[e])/4.0] |
|
156 |
|
|
|
157 |
|
gt.graph_draw(self.g, pos=pos, vertex_shape=shape, vertex_color=vcolour, vertex_fill_color=vfcolour, vorder=self.eigc, edge_color=ecolour, eorder=self.eorder, edge_control_points=self.econtrol, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+name+".png") |
|
158 |
|
|
|
159 |
|
#self.redis.sadd(self.layout_index_key,name) |
|
160 |
|
#self.redis.hset(self.percolation_prefix+self.layout_index_key,name,pct) |