File config.py changed (mode: 100644) (index 59cc2dd..05c0bdf) |
1 |
|
#config.py
|
|
2 |
|
import metrics
|
|
3 |
|
import normalizations
|
|
4 |
|
import advancedscores
|
|
5 |
|
import percolation
|
|
6 |
|
import visualization
|
|
7 |
|
|
|
8 |
|
#redis keys for indexes and values
|
|
9 |
|
graph_index_key = 'all_graphs'
|
|
10 |
|
|
|
11 |
|
info_index_key = 'general_info'
|
|
12 |
|
node_index_key = 'all_nodes'
|
|
13 |
|
metric_index_key = 'all_metrics'
|
|
14 |
|
score_index_key = 'all_scores'
|
|
15 |
|
percolation_index_key = 'all_percolation_modes'
|
|
16 |
|
layout_index_key = 'all_layouts'
|
|
17 |
|
|
|
18 |
|
node_neighbors_prefix = 'node_neighbors:'
|
|
19 |
|
node_prefix = 'node_metrics:'
|
|
20 |
|
metric_prefix = 'metric:'
|
|
21 |
|
score_prefix = 'score:'
|
|
22 |
|
statistics_prefix = 'statistics:'
|
|
23 |
|
percolation_prefix = 'percolation:'
|
|
24 |
|
|
|
25 |
|
normalization_suffix = '_normalized'
|
|
26 |
|
|
|
27 |
|
|
|
28 |
|
# definition of all base metrics for which absolute values will be calculcated for each node in the first step
|
|
29 |
|
# key is the name of the metric and value is the implemented method which exposes the required interface
|
|
30 |
|
# interface: each method takes the node as the single parameter, performs the necessary calculation and
|
|
31 |
|
# returns a float containing the value for the specified node
|
|
32 |
|
|
|
33 |
|
base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient,
|
|
34 |
|
'degree' : metrics.degree,
|
|
35 |
|
# 'degree_(gt)' : metrics.degree_gt,
|
|
36 |
|
'average_neighbor_degree' : metrics.average_neighbor_degree,
|
|
37 |
|
'iterated_average_neighbor_degree' : metrics.iterated_average_neighbor_degree,
|
|
38 |
|
# 'iterated_average_neighbor_degree_(gt)': metrics.iterated_average_neighbor_degree,
|
|
39 |
|
# 'betweenness_centrality' : metrics.betweenness_centrality,
|
|
40 |
|
'betweenness_centrality_(gt)' : metrics.betweenness_centrality_gt,
|
|
41 |
|
# 'eccentricity' : metrics.eccentricity,
|
|
42 |
|
'eccentricity_(gt)' : metrics.eccentricity_gt,
|
|
43 |
|
# 'eccentricity_(gt)_s' : metrics.eccentricity_gt_s,
|
|
44 |
|
# 'average_shortest_path_length' : metrics.average_shortest_path_length,
|
|
45 |
|
'average_shortest_path_length_(gt)' : metrics.average_shortest_path_length_gt,
|
|
46 |
|
# 'average_shortest_path_length_(gt)_s' : metrics.average_shortest_path_length_gt_small_graphs,
|
|
47 |
|
'eigenvector_centrality_(gt)' : metrics.eigenvector_centrality_gt,
|
|
48 |
|
# 'eigenvector_centrality' : metrics.eigenvector_centrality,
|
|
49 |
|
# 'deterioration' : metrics.deterioration
|
|
50 |
|
}
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
# some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
|
|
54 |
|
# key is the metric name and value the method for correction
|
|
55 |
|
|
|
56 |
|
advanced_metrics = {'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient,
|
|
57 |
|
'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree,
|
|
58 |
|
'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree}
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
# for every metric, a normalization method has to be specified
|
|
62 |
|
# key is the name of the metric and value is the normalization method which also has to expose the required interface
|
|
63 |
|
# interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
|
|
64 |
|
# the method itself shall access the data which is required for normalization from the redis instance
|
|
65 |
|
# and the corresponding keys/values for the specified metric
|
|
66 |
|
# it shall then loop over all nodes and calculate the normalized value for the node and the metric
|
|
67 |
|
# afterwards it should save the result to redis using "metric_name_normalized" as the key
|
|
68 |
|
# the result is stored inside the node's hash for metrics
|
|
69 |
|
|
|
70 |
|
# also needs to include corrected metrics with their respective names
|
|
71 |
|
#
|
|
72 |
|
normalization_methods = { 'clustering_coefficient' : normalizations.min_max,
|
|
73 |
|
'corrected_clustering_coefficient' : normalizations.min_max,
|
|
74 |
|
'degree' : normalizations.min_max,
|
|
75 |
|
'degree_(gt)' : normalizations.min_max,
|
|
76 |
|
'average_neighbor_degree' : normalizations.min_max,
|
|
77 |
|
'corrected_average_neighbor_degree' : normalizations.min_max,
|
|
78 |
|
'iterated_average_neighbor_degree' : normalizations.min_max,
|
|
79 |
|
'iterated_average_neighbor_degree_(gt)' : normalizations.min_max,
|
|
80 |
|
'corrected_iterated_average_neighbor_degree': normalizations.min_max,
|
|
81 |
|
'betweenness_centrality' : normalizations.min_max,
|
|
82 |
|
'betweenness_centrality_(gt)' : normalizations.min_max,
|
|
83 |
|
'eccentricity' : normalizations.max_min,
|
|
84 |
|
'eccentricity_(gt)' : normalizations.max_min,
|
|
85 |
|
'eccentricity_(gt)_s' : normalizations.max_min,
|
|
86 |
|
'average_shortest_path_length' : normalizations.max_min,
|
|
87 |
|
'average_shortest_path_length_(gt)' : normalizations.max_min,
|
|
88 |
|
'average_shortest_path_length_(gt)_s' : normalizations.max_min,
|
|
89 |
|
'eigenvector_centrality_(gt)' : normalizations.min_max,
|
|
90 |
|
'eigenvector_centrality' : normalizations.min_max,
|
|
91 |
|
'deterioration' : normalizations.min_max
|
|
92 |
|
}
|
|
93 |
|
|
|
94 |
|
|
|
95 |
|
# the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
|
|
96 |
|
# such scores can easily be defined here
|
|
97 |
|
# note: names are not methods but redis keys
|
|
98 |
|
|
|
99 |
|
scores = {'unified_risk_score': { 'degree': 0.25,
|
|
100 |
|
'corrected_average_neighbor_degree': 0.15,
|
|
101 |
|
'corrected_iterated_average_neighbor_degree': 0.1,
|
|
102 |
|
'betweenness_centrality_(gt)': 0.25,
|
|
103 |
|
# 'eccentricity': 0.125,
|
|
104 |
|
'average_shortest_path_length_(gt)': 0.25}
|
|
105 |
|
}
|
|
106 |
|
|
|
107 |
|
|
|
108 |
|
# other scores might require a more sophisticated algorithm to be calculated
|
|
109 |
|
# such scores need to be added here and implemented like the example below
|
|
110 |
|
|
|
111 |
|
advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score}
|
|
112 |
|
|
|
113 |
|
|
|
114 |
|
# these are the different percolation modes with name as key and method as value
|
|
115 |
|
# advanced modes have sub-modes for e.g. each metric
|
|
116 |
|
|
|
117 |
|
percolation_modes = {'failure': percolation.failure,
|
|
118 |
|
'random_walk': percolation.random_walk,
|
|
119 |
|
'russian_shutoff': percolation.russian
|
|
120 |
|
}
|
|
121 |
|
|
|
122 |
|
advanced_percolation_modes = {'target_list': percolation.target_list,
|
|
123 |
|
'hybrid_mode': percolation.hybrid_mode
|
|
124 |
|
}
|
|
125 |
|
|
|
126 |
|
# layouts for graph visualization.
|
|
127 |
|
# note 1: ARF does not seem to work with most graphs (error message: non-invertible matrix)
|
|
128 |
|
# note 2: Fruchtermann-Rheingold layout (FRUCHT) takes up a high percentrage of computation time
|
|
129 |
|
visualization_layouts = {#'SFDP': visualization.sfdp,
|
|
130 |
|
'Radial': visualization.radial,
|
|
131 |
|
#'Random': visualization.random,
|
|
132 |
|
#'ARF': visualization.arf,
|
|
133 |
|
#'Fruchterman_Reingold':visualization.frucht
|
|
134 |
|
}
|
|
|
1 |
|
#config.py |
|
2 |
|
import metrics |
|
3 |
|
import normalizations |
|
4 |
|
import advancedscores |
|
5 |
|
import percolation |
|
6 |
|
import visualization |
|
7 |
|
|
|
8 |
|
#redis keys for indexes and values |
|
9 |
|
graph_index_key = 'all_graphs' |
|
10 |
|
|
|
11 |
|
info_index_key = 'general_info' |
|
12 |
|
node_index_key = 'all_nodes' |
|
13 |
|
metric_index_key = 'all_metrics' |
|
14 |
|
score_index_key = 'all_scores' |
|
15 |
|
percolation_index_key = 'all_percolation_modes' |
|
16 |
|
layout_index_key = 'all_layouts' |
|
17 |
|
|
|
18 |
|
node_neighbors_prefix = 'node_neighbors:' |
|
19 |
|
node_prefix = 'node_metrics:' |
|
20 |
|
metric_prefix = 'metric:' |
|
21 |
|
score_prefix = 'score:' |
|
22 |
|
statistics_prefix = 'statistics:' |
|
23 |
|
percolation_prefix = 'percolation:' |
|
24 |
|
|
|
25 |
|
normalization_suffix = '_normalized' |
|
26 |
|
|
|
27 |
|
|
|
28 |
|
# definition of all base metrics for which absolute values will be calculcated for each node in the first step |
|
29 |
|
# key is the name of the metric and value is the implemented method which exposes the required interface |
|
30 |
|
# interface: each method takes the node as the single parameter, performs the necessary calculation and |
|
31 |
|
# returns a float containing the value for the specified node |
|
32 |
|
|
|
33 |
|
base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient, |
|
34 |
|
'degree' : metrics.degree, |
|
35 |
|
# 'degree_(gt)' : metrics.degree_gt, |
|
36 |
|
'average_neighbor_degree' : metrics.average_neighbor_degree, |
|
37 |
|
'iterated_average_neighbor_degree' : metrics.iterated_average_neighbor_degree, |
|
38 |
|
# 'iterated_average_neighbor_degree_(gt)': metrics.iterated_average_neighbor_degree, |
|
39 |
|
# 'betweenness_centrality' : metrics.betweenness_centrality, |
|
40 |
|
'betweenness_centrality_(gt)' : metrics.betweenness_centrality_gt, |
|
41 |
|
# 'eccentricity' : metrics.eccentricity, |
|
42 |
|
'eccentricity_(gt)' : metrics.eccentricity_gt, |
|
43 |
|
# 'eccentricity_(gt)_s' : metrics.eccentricity_gt_s, |
|
44 |
|
# 'average_shortest_path_length' : metrics.average_shortest_path_length, |
|
45 |
|
'average_shortest_path_length_(gt)' : metrics.average_shortest_path_length_gt, |
|
46 |
|
# 'average_shortest_path_length_(gt)_s' : metrics.average_shortest_path_length_gt_small_graphs, |
|
47 |
|
'eigenvector_centrality_(gt)' : metrics.eigenvector_centrality_gt, |
|
48 |
|
# 'eigenvector_centrality' : metrics.eigenvector_centrality, |
|
49 |
|
# 'deterioration' : metrics.deterioration |
|
50 |
|
} |
|
51 |
|
|
|
52 |
|
|
|
53 |
|
# some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations |
|
54 |
|
# key is the metric name and value the method for correction |
|
55 |
|
|
|
56 |
|
advanced_metrics = { 'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient, |
|
57 |
|
'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree, |
|
58 |
|
'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree |
|
59 |
|
} |
|
60 |
|
|
|
61 |
|
|
|
62 |
|
# for every metric, a normalization method has to be specified |
|
63 |
|
# key is the name of the metric and value is the normalization method which also has to expose the required interface |
|
64 |
|
# interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required |
|
65 |
|
# the method itself shall access the data which is required for normalization from the redis instance |
|
66 |
|
# and the corresponding keys/values for the specified metric |
|
67 |
|
# it shall then loop over all nodes and calculate the normalized value for the node and the metric |
|
68 |
|
# afterwards it should save the result to redis using "metric_name_normalized" as the key |
|
69 |
|
# the result is stored inside the node's hash for metrics |
|
70 |
|
|
|
71 |
|
# also needs to include corrected metrics with their respective names |
|
72 |
|
# |
|
73 |
|
normalization_methods = { 'clustering_coefficient' : normalizations.min_max, |
|
74 |
|
'corrected_clustering_coefficient' : normalizations.min_max, |
|
75 |
|
'degree' : normalizations.min_max, |
|
76 |
|
'degree_(gt)' : normalizations.min_max, |
|
77 |
|
'average_neighbor_degree' : normalizations.min_max, |
|
78 |
|
'corrected_average_neighbor_degree' : normalizations.min_max, |
|
79 |
|
'iterated_average_neighbor_degree' : normalizations.min_max, |
|
80 |
|
'iterated_average_neighbor_degree_(gt)' : normalizations.min_max, |
|
81 |
|
'corrected_iterated_average_neighbor_degree': normalizations.min_max, |
|
82 |
|
'betweenness_centrality' : normalizations.min_max, |
|
83 |
|
'betweenness_centrality_(gt)' : normalizations.min_max, |
|
84 |
|
'eccentricity' : normalizations.max_min, |
|
85 |
|
'eccentricity_(gt)' : normalizations.max_min, |
|
86 |
|
'eccentricity_(gt)_s' : normalizations.max_min, |
|
87 |
|
'average_shortest_path_length' : normalizations.max_min, |
|
88 |
|
'average_shortest_path_length_(gt)' : normalizations.max_min, |
|
89 |
|
'average_shortest_path_length_(gt)_s' : normalizations.max_min, |
|
90 |
|
'eigenvector_centrality_(gt)' : normalizations.min_max, |
|
91 |
|
'eigenvector_centrality' : normalizations.min_max, |
|
92 |
|
'deterioration' : normalizations.min_max |
|
93 |
|
} |
|
94 |
|
|
|
95 |
|
|
|
96 |
|
# the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1 |
|
97 |
|
# such scores can easily be defined here |
|
98 |
|
# note: names are not methods but redis keys |
|
99 |
|
|
|
100 |
|
scores = {'unified_risk_score': { 'degree': 0.25, |
|
101 |
|
'corrected_average_neighbor_degree': 0.15, |
|
102 |
|
'corrected_iterated_average_neighbor_degree': 0.1, |
|
103 |
|
'betweenness_centrality_(gt)': 0.25, |
|
104 |
|
#'eccentricity': 0.125, |
|
105 |
|
'average_shortest_path_length_(gt)': 0.25} |
|
106 |
|
} |
|
107 |
|
|
|
108 |
|
|
|
109 |
|
# other scores might require a more sophisticated algorithm to be calculated |
|
110 |
|
# such scores need to be added here and implemented like the example below |
|
111 |
|
|
|
112 |
|
advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score} |
|
113 |
|
|
|
114 |
|
|
|
115 |
|
# these are the different percolation modes with name as key and method as value |
|
116 |
|
# advanced modes have sub-modes for e.g. each metric |
|
117 |
|
|
|
118 |
|
percolation_modes = {'failure': percolation.failure, |
|
119 |
|
'random_walk': percolation.random_walk, |
|
120 |
|
'russian_shutoff': percolation.russian |
|
121 |
|
} |
|
122 |
|
|
|
123 |
|
advanced_percolation_modes = {'target_list': percolation.target_list, |
|
124 |
|
'hybrid_mode': percolation.hybrid_mode |
|
125 |
|
} |
|
126 |
|
|
|
127 |
|
# layouts for graph visualization. |
|
128 |
|
# note 1: ARF does not seem to work with most graphs (error message: non-invertible matrix) |
|
129 |
|
# note 2: Fruchtermann-Rheingold layout (FRUCHT) takes up a high percentrage of computation time |
|
130 |
|
visualization_layouts = {#'SFDP': visualization.sfdp, |
|
131 |
|
'Radial': visualization.radial, |
|
132 |
|
#'Random': visualization.random, |
|
133 |
|
#'ARF': visualization.arf, |
|
134 |
|
#'Fruchterman_Reingold':visualization.frucht |
|
135 |
|
} |
|
136 |
|
|
|
137 |
|
# Redis |
|
138 |
|
REDIS_PORT = 6379 |
|
139 |
|
REDIS_HOST = redis |
File metric_calculator.py changed (mode: 100644) (index 26c1f58..e7f0688) |
1 |
|
import networkx as nx
|
|
2 |
|
import graph_tool.all as gt
|
|
3 |
|
import redis as rd
|
|
4 |
|
import numpy as np
|
|
5 |
|
import indexing
|
|
6 |
|
import statistics
|
|
7 |
|
import normalizations
|
|
8 |
|
import config
|
|
9 |
|
import percolation
|
|
10 |
|
import visualization
|
|
11 |
|
import datetime as dt
|
|
12 |
|
|
|
13 |
|
|
|
14 |
|
class MetricCalculator(object):
|
|
15 |
|
def __init__ (self, graph, graph_gt):
|
|
16 |
|
#class constructor
|
|
17 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
|
|
18 |
|
|
|
19 |
|
print ('Starting metric_calculator!')
|
|
20 |
|
|
|
21 |
|
# for code evaluation
|
|
22 |
|
self.start_time = dt.datetime.now()
|
|
23 |
|
self.durations = {}
|
|
24 |
|
self.durations_in_seconds = {}
|
|
25 |
|
self.durations_in_percent = {}
|
|
26 |
|
|
|
27 |
|
self.graph = graph
|
|
28 |
|
self.graph_gt = graph_gt
|
|
29 |
|
|
|
30 |
|
# alternate name for graph tool graph
|
|
31 |
|
self.g = self.graph_gt['graph_gt']
|
|
32 |
|
# alternate name for graph tool labels
|
|
33 |
|
self.g.vp.label_map = self.graph_gt['graph_gt_labels']
|
|
34 |
|
self.label_map = self.g.vp.label_map
|
|
35 |
|
# vertex property map for percolation calculations
|
|
36 |
|
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
|
|
37 |
|
self.exclusion_map = self.g.vp.exmap
|
|
38 |
|
self.exclusion_map.a = 1 #initialise filter map
|
|
39 |
|
#find largest component of graph tool graph for percolation calculations
|
|
40 |
|
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
|
|
41 |
|
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
|
|
45 |
|
self.nodes = nx.nodes(graph)
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access
|
|
49 |
|
self.graph_index_key = config.graph_index_key
|
|
50 |
|
|
|
51 |
|
self.graph_name = ''
|
|
52 |
|
while (self.graph_name == ''):
|
|
53 |
|
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
|
|
54 |
|
|
|
55 |
|
self.info_index_key = self.graph_name+':'+config.info_index_key
|
|
56 |
|
self.node_index_key = self.graph_name+':'+config.node_index_key
|
|
57 |
|
self.metric_index_key = self.graph_name+':'+config.metric_index_key
|
|
58 |
|
self.score_index_key = self.graph_name+':'+config.score_index_key
|
|
59 |
|
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
|
|
60 |
|
self.layout_index_key = self.graph_name+':'+config.layout_index_key
|
|
61 |
|
|
|
62 |
|
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
|
|
63 |
|
self.node_prefix = self.graph_name+':'+config.node_prefix
|
|
64 |
|
self.metric_prefix = self.graph_name+':'+config.metric_prefix
|
|
65 |
|
self.score_prefix = self.graph_name+':'+config.score_prefix
|
|
66 |
|
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
|
|
67 |
|
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
|
|
68 |
|
|
|
69 |
|
self.normalization_suffix = config.normalization_suffix
|
|
70 |
|
|
|
71 |
|
self.base_metrics = config.base_metrics
|
|
72 |
|
self.advanced_metrics = config.advanced_metrics
|
|
73 |
|
|
|
74 |
|
self.normalization_methods = config.normalization_methods
|
|
75 |
|
|
|
76 |
|
self.scores = config.scores
|
|
77 |
|
self.advanced_scores = config.advanced_scores
|
|
78 |
|
|
|
79 |
|
self.visualization_layouts = config.visualization_layouts
|
|
80 |
|
|
|
81 |
|
self.percolation_modes = config.percolation_modes
|
|
82 |
|
self.advanced_percolation_modes = config.advanced_percolation_modes
|
|
83 |
|
|
|
84 |
|
##############################################################################
|
|
85 |
|
###### start describes the entire calculation in a high level overview #######
|
|
86 |
|
##############################################################################
|
|
87 |
|
|
|
88 |
|
def start(self):
|
|
89 |
|
start_time_calculation = dt.datetime.now()
|
|
90 |
|
|
|
91 |
|
#preliminary calculations
|
|
92 |
|
self.flush_database()
|
|
93 |
|
self.obtain_percentages()
|
|
94 |
|
self.create_info()
|
|
95 |
|
self.create_standard_layout()
|
|
96 |
|
self.save_graph_data('raw')
|
|
97 |
|
|
|
98 |
|
#index creation
|
|
99 |
|
self.create_indexes()
|
|
100 |
|
|
|
101 |
|
#main calculations
|
|
102 |
|
self.calculate_metrics()
|
|
103 |
|
self.calculate_advanced_metrics()
|
|
104 |
|
self.normalize_metrics()
|
|
105 |
|
self.calculate_scores()
|
|
106 |
|
self.calculate_advanced_scores()
|
|
107 |
|
|
|
108 |
|
#statistics
|
|
109 |
|
self.calculate_statistics()
|
|
110 |
|
|
|
111 |
|
#dynamic metrics / percolation
|
|
112 |
|
self.calculate_percolation()
|
|
113 |
|
|
|
114 |
|
#visualization
|
|
115 |
|
self.visualize_graph()
|
|
116 |
|
|
|
117 |
|
#save final graph
|
|
118 |
|
self.save_graph_data('full')
|
|
119 |
|
|
|
120 |
|
#evaluation
|
|
121 |
|
self.duration_total = dt.datetime.now() - start_time_calculation
|
|
122 |
|
self.evaluate_durations()
|
|
123 |
|
|
|
124 |
|
|
|
125 |
|
###################
|
|
126 |
|
## PRELIMINARIES ##
|
|
127 |
|
###################
|
|
128 |
|
def flush_database(self):
|
|
129 |
|
# ask to clean all data in Redis
|
|
130 |
|
flush_flag = 'Flushing'
|
|
131 |
|
while (flush_flag != 'y' and flush_flag != 'n'):
|
|
132 |
|
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
|
|
133 |
|
if flush_flag == 'y':
|
|
134 |
|
self.redis.flushdb()
|
|
135 |
|
|
|
136 |
|
def obtain_percentages(self):
|
|
137 |
|
# obtain percentages for calculation of deterioration #
|
|
138 |
|
# and calculate number of nodes to remove from graph ##
|
|
139 |
|
percentages = '' # initialise
|
|
140 |
|
while (percentages == ''):
|
|
141 |
|
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
|
|
142 |
|
|
|
143 |
|
percentages = sorted([float(pct)for pct in percentages.split()])
|
|
144 |
|
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
|
|
145 |
|
# create a dictionary of percentages and corresponding numbers of nodes
|
|
146 |
|
self.percentages = dict(zip(numbers,percentages))
|
|
147 |
|
# storing values in redis DB
|
|
148 |
|
self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
|
|
149 |
|
|
|
150 |
|
def create_info(self):
|
|
151 |
|
#store general info about graph
|
|
152 |
|
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
|
|
153 |
|
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
|
|
154 |
|
|
|
155 |
|
def create_standard_layout(self):
|
|
156 |
|
# create a standard layout
|
|
157 |
|
start_time = dt.datetime.now()
|
|
158 |
|
print 'Creating standard layout for graph visualization.'
|
|
159 |
|
if not hasattr(self.g.vp, 'sfdp'):
|
|
160 |
|
self.sfdp = gt.sfdp_layout(self.g, C=0.5)
|
|
161 |
|
self.g.vp['sfdp'] = self.sfdp
|
|
162 |
|
else:
|
|
163 |
|
self.sfdp = self.g.vp['sfdp']
|
|
164 |
|
self.durations['SFDP_layout'] = dt.datetime.now() - start_time
|
|
165 |
|
|
|
166 |
|
def save_graph_data(self,name):
|
|
167 |
|
# save graph
|
|
168 |
|
start_time = dt.datetime.now()
|
|
169 |
|
print 'Saving raw graph data'
|
|
170 |
|
self.g.save(self.graph_name+'_'+name+'.gt.gz')
|
|
171 |
|
self.durations['saving_graph'+name] = dt.datetime.now() - start_time
|
|
172 |
|
|
|
173 |
|
##################
|
|
174 |
|
#### INDEXING ####
|
|
175 |
|
##################
|
|
176 |
|
def create_indexes(self):
|
|
177 |
|
start_time = dt.datetime.now()
|
|
178 |
|
#call methods defined in indexing.py
|
|
179 |
|
indexing.index_graph(self)
|
|
180 |
|
indexing.index_nodes(self)
|
|
181 |
|
indexing.index_neighbors(self)
|
|
182 |
|
indexing.index_metrics(self)
|
|
183 |
|
indexing.index_scores(self)
|
|
184 |
|
#indexing.index_percolation(self)
|
|
185 |
|
self.durations['indexing'] = dt.datetime.now() - start_time
|
|
186 |
|
|
|
187 |
|
###########################
|
|
188 |
|
#### CALCULATION LOOPS ####
|
|
189 |
|
###########################
|
|
190 |
|
|
|
191 |
|
def calculate_metrics(self):
|
|
192 |
|
start_time_total = dt.datetime.now()
|
|
193 |
|
# loop through all defined metrics and call specified calculation method for each node
|
|
194 |
|
print ('Starting calculate_metrics')
|
|
195 |
|
for metric_name in self.base_metrics:
|
|
196 |
|
start_time = dt.datetime.now()
|
|
197 |
|
metric_method = self.base_metrics[metric_name]
|
|
198 |
|
|
|
199 |
|
# loop through all nodes
|
|
200 |
|
for node in self.nodes:
|
|
201 |
|
# call calculation method of supplied metric for current node
|
|
202 |
|
node = int(node)
|
|
203 |
|
value = float(metric_method(self,node))
|
|
204 |
|
|
|
205 |
|
#store result in node values
|
|
206 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value)
|
|
207 |
|
|
|
208 |
|
#also store result to metric set
|
|
209 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
|
210 |
|
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
|
|
211 |
|
self.durations['metrics_total'] = dt.datetime.now() - start_time_total
|
|
212 |
|
|
|
213 |
|
|
|
214 |
|
def calculate_advanced_metrics(self):
|
|
215 |
|
start_time_total = dt.datetime.now()
|
|
216 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
|
217 |
|
print ('Starting calculate_advanced_metrics')
|
|
218 |
|
for advanced_metric_name in self.advanced_metrics:
|
|
219 |
|
start_time = dt.datetime.now()
|
|
220 |
|
metric_method = self.advanced_metrics[advanced_metric_name]
|
|
221 |
|
|
|
222 |
|
# loop through all nodes
|
|
223 |
|
for node in self.nodes:
|
|
224 |
|
node = int(node)
|
|
225 |
|
value = float(metric_method(self,node))
|
|
226 |
|
|
|
227 |
|
#store result in node values
|
|
228 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
|
|
229 |
|
|
|
230 |
|
#also store result to metric set
|
|
231 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
|
|
232 |
|
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
|
|
233 |
|
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
|
|
234 |
|
|
|
235 |
|
|
|
236 |
|
# loop through all defined normalizations and call respective normalization method
|
|
237 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash
|
|
238 |
|
def normalize_metrics(self):
|
|
239 |
|
start_time = dt.datetime.now()
|
|
240 |
|
#fallback normalization: min-max
|
|
241 |
|
print ('Starting normalize_metrics')
|
|
242 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
|
|
243 |
|
|
|
244 |
|
for metric_name in all_metrics:
|
|
245 |
|
if self.normalization_methods.has_key(metric_name):
|
|
246 |
|
normalization_method = self.normalization_methods[metric_name]
|
|
247 |
|
else:
|
|
248 |
|
#fallback normalization is min-max
|
|
249 |
|
normalization_method = normalizations.min_max
|
|
250 |
|
normalization_method(self,metric_name)
|
|
251 |
|
|
|
252 |
|
self.durations['normalizing'] = dt.datetime.now() - start_time
|
|
253 |
|
|
|
254 |
|
|
|
255 |
|
def calculate_scores(self):
|
|
256 |
|
start_time = dt.datetime.now()
|
|
257 |
|
print ('Starting calculate_scores')
|
|
258 |
|
for score_name in self.scores:
|
|
259 |
|
metrics_with_weights = self.scores[score_name]
|
|
260 |
|
|
|
261 |
|
for node in self.nodes:
|
|
262 |
|
score_value = 0.0
|
|
263 |
|
|
|
264 |
|
# get normalized values
|
|
265 |
|
for metric in metrics_with_weights:
|
|
266 |
|
weight = self.scores[score_name][metric]
|
|
267 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
|
|
268 |
|
score_value += weight * value
|
|
269 |
|
|
|
270 |
|
#redis_server.hset(key, value, number);
|
|
271 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
|
|
272 |
|
|
|
273 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
|
|
274 |
|
|
|
275 |
|
self.durations['scores'] = dt.datetime.now() - start_time
|
|
276 |
|
|
|
277 |
|
def calculate_advanced_scores(self):
|
|
278 |
|
start_time = dt.datetime.now()
|
|
279 |
|
print ('Starting calculate_advanced_scores')
|
|
280 |
|
for advanced_score in self.advanced_scores:
|
|
281 |
|
self.advanced_scores[advanced_score](self)
|
|
282 |
|
|
|
283 |
|
self.durations['adv_scores'] = dt.datetime.now() - start_time
|
|
284 |
|
|
|
285 |
|
|
|
286 |
|
#############
|
|
287 |
|
# statistics
|
|
288 |
|
#############
|
|
289 |
|
|
|
290 |
|
def calculate_statistics(self):
|
|
291 |
|
start_time = dt.datetime.now()
|
|
292 |
|
print ('Starting calculate_statistics')
|
|
293 |
|
for metric in self.base_metrics:
|
|
294 |
|
#absolute and normalized
|
|
295 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
|
|
296 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
|
|
297 |
|
|
|
298 |
|
for advanced_metric in self.advanced_metrics:
|
|
299 |
|
#absolute and normalized
|
|
300 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
|
|
301 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
|
|
302 |
|
|
|
303 |
|
for score in self.scores:
|
|
304 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score)
|
|
305 |
|
|
|
306 |
|
for advanced_score in self.advanced_scores:
|
|
307 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
|
|
308 |
|
self.durations['statistics:stats'] = dt.datetime.now() - start_time
|
|
309 |
|
|
|
310 |
|
start_time = dt.datetime.now()
|
|
311 |
|
statistics.calculate_correlations(self)
|
|
312 |
|
self.durations['statistics:corr'] = dt.datetime.now() - start_time
|
|
313 |
|
|
|
314 |
|
###################
|
|
315 |
|
# dynamic metrics #
|
|
316 |
|
###################
|
|
317 |
|
|
|
318 |
|
def calculate_percolation(self):
|
|
319 |
|
start_time_total = dt.datetime.now()
|
|
320 |
|
print ('Starting percolation calculation')
|
|
321 |
|
|
|
322 |
|
# shorten the name for percentages and corresponding numbers of nodes to remove
|
|
323 |
|
n = self.percentages
|
|
324 |
|
|
|
325 |
|
# BASIC PERCOLATION MODES
|
|
326 |
|
# basic percolation modes take mode_name and n as input and return a #
|
|
327 |
|
# dictionary with percentage of nodes removed as key and percentage ##
|
|
328 |
|
# of deterioration as value
|
|
329 |
|
for mode_name in self.percolation_modes:
|
|
330 |
|
start_time = dt.datetime.now()
|
|
331 |
|
# initialise exlusion vertex property map
|
|
332 |
|
self.exclusion_map.a = 1
|
|
333 |
|
# read method from config file
|
|
334 |
|
mode_method = self.percolation_modes[mode_name]
|
|
335 |
|
# execute method
|
|
336 |
|
results = mode_method(self,mode_name,n)
|
|
337 |
|
# index percolation mode
|
|
338 |
|
self.redis.sadd(self.percolation_index_key, mode_name)
|
|
339 |
|
# store values
|
|
340 |
|
print 'Storing percolation percentages'
|
|
341 |
|
for percentage in results:
|
|
342 |
|
value = results[percentage]
|
|
343 |
|
#store in hash set
|
|
344 |
|
self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
|
|
345 |
|
|
|
346 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
347 |
|
|
|
348 |
|
# ADVANCED PERCOLATION MODES
|
|
349 |
|
# advanced percolation modes take mode_name and n as input and return a ###
|
|
350 |
|
# dictionary with groups of percolation modes (e.g. metrics, countries) ###
|
|
351 |
|
# as keys and dictionaries of percentages (removed: deteriorated) as values
|
|
352 |
|
for mode_name in self.advanced_percolation_modes:
|
|
353 |
|
start_time = dt.datetime.now()
|
|
354 |
|
# initialise exlusion vertex property map
|
|
355 |
|
self.exclusion_map.a = 1
|
|
356 |
|
# read method from config file
|
|
357 |
|
mode_method = self.advanced_percolation_modes[mode_name]
|
|
358 |
|
# execute method
|
|
359 |
|
results = mode_method(self,mode_name,n)
|
|
360 |
|
|
|
361 |
|
# store values
|
|
362 |
|
print 'Storing percolation percentages'
|
|
363 |
|
for group in results:
|
|
364 |
|
# index percolation modes
|
|
365 |
|
self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
|
|
366 |
|
for percentage in results[group]:
|
|
367 |
|
value = results[group][percentage]
|
|
368 |
|
#store in hash set
|
|
369 |
|
self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
|
|
370 |
|
|
|
371 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
372 |
|
|
|
373 |
|
self.durations['percolation_total'] = dt.datetime.now() - start_time_total
|
|
374 |
|
|
|
375 |
|
|
|
376 |
|
def visualize_graph(self):
|
|
377 |
|
|
|
378 |
|
for layout_name in self.visualization_layouts:
|
|
379 |
|
start_time = dt.datetime.now()
|
|
380 |
|
print 'Creating visualisation with '+layout_name+' layout'
|
|
381 |
|
|
|
382 |
|
layout_method = self.visualization_layouts[layout_name]
|
|
383 |
|
pos = layout_method(self)
|
|
384 |
|
gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
|
|
385 |
|
|
|
386 |
|
self.redis.sadd(self.layout_index_key, layout_name)
|
|
387 |
|
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
|
|
388 |
|
|
|
389 |
|
def evaluate_durations(self):
|
|
390 |
|
#print out times taken
|
|
391 |
|
print 'times taken:'
|
|
392 |
|
output = open(str(self.graph_name)+"_duration_test.txt","w")
|
|
393 |
|
output.write("Graph Name:\t"+str(self.graph_name)+"\n")
|
|
394 |
|
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
|
|
395 |
|
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
|
|
396 |
|
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
|
|
397 |
|
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
|
|
398 |
|
for key in self.durations:
|
|
399 |
|
self.durations_in_seconds[key] = self.durations[key].total_seconds()
|
|
400 |
|
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
|
|
401 |
|
|
|
402 |
|
print str(key)+'\t'+str(self.durations_in_percent[key])
|
|
403 |
|
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')
|
|
|
1 |
|
import networkx as nx |
|
2 |
|
import graph_tool.all as gt |
|
3 |
|
import redis as rd |
|
4 |
|
import numpy as np |
|
5 |
|
import indexing |
|
6 |
|
import statistics |
|
7 |
|
import normalizations |
|
8 |
|
import config |
|
9 |
|
import percolation |
|
10 |
|
import visualization |
|
11 |
|
import datetime as dt |
|
12 |
|
|
|
13 |
|
|
|
14 |
|
class MetricCalculator(object): |
|
15 |
|
def __init__ (self, graph, graph_gt): |
|
16 |
|
#class constructor |
|
17 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph |
|
18 |
|
|
|
19 |
|
print ('Starting metric_calculator!') |
|
20 |
|
|
|
21 |
|
# for code evaluation |
|
22 |
|
self.start_time = dt.datetime.now() |
|
23 |
|
self.durations = {} |
|
24 |
|
self.durations_in_seconds = {} |
|
25 |
|
self.durations_in_percent = {} |
|
26 |
|
|
|
27 |
|
self.graph = graph |
|
28 |
|
self.graph_gt = graph_gt |
|
29 |
|
|
|
30 |
|
# alternate name for graph tool graph |
|
31 |
|
self.g = self.graph_gt['graph_gt'] |
|
32 |
|
# alternate name for graph tool labels |
|
33 |
|
self.g.vp.label_map = self.graph_gt['graph_gt_labels'] |
|
34 |
|
self.label_map = self.g.vp.label_map |
|
35 |
|
# vertex property map for percolation calculations |
|
36 |
|
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map |
|
37 |
|
self.exclusion_map = self.g.vp.exmap |
|
38 |
|
self.exclusion_map.a = 1 #initialise filter map |
|
39 |
|
#find largest component of graph tool graph for percolation calculations |
|
40 |
|
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph) |
|
41 |
|
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g)) |
|
42 |
|
|
|
43 |
|
|
|
44 |
|
self.redis = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1) |
|
45 |
|
self.nodes = nx.nodes(graph) |
|
46 |
|
|
|
47 |
|
|
|
48 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access |
|
49 |
|
self.graph_index_key = config.graph_index_key |
|
50 |
|
|
|
51 |
|
self.graph_name = '' |
|
52 |
|
while (self.graph_name == ''): |
|
53 |
|
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n") |
|
54 |
|
|
|
55 |
|
self.info_index_key = self.graph_name+':'+config.info_index_key |
|
56 |
|
self.node_index_key = self.graph_name+':'+config.node_index_key |
|
57 |
|
self.metric_index_key = self.graph_name+':'+config.metric_index_key |
|
58 |
|
self.score_index_key = self.graph_name+':'+config.score_index_key |
|
59 |
|
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key |
|
60 |
|
self.layout_index_key = self.graph_name+':'+config.layout_index_key |
|
61 |
|
|
|
62 |
|
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix |
|
63 |
|
self.node_prefix = self.graph_name+':'+config.node_prefix |
|
64 |
|
self.metric_prefix = self.graph_name+':'+config.metric_prefix |
|
65 |
|
self.score_prefix = self.graph_name+':'+config.score_prefix |
|
66 |
|
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix |
|
67 |
|
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix |
|
68 |
|
|
|
69 |
|
self.normalization_suffix = config.normalization_suffix |
|
70 |
|
|
|
71 |
|
self.base_metrics = config.base_metrics |
|
72 |
|
self.advanced_metrics = config.advanced_metrics |
|
73 |
|
|
|
74 |
|
self.normalization_methods = config.normalization_methods |
|
75 |
|
|
|
76 |
|
self.scores = config.scores |
|
77 |
|
self.advanced_scores = config.advanced_scores |
|
78 |
|
|
|
79 |
|
self.visualization_layouts = config.visualization_layouts |
|
80 |
|
|
|
81 |
|
self.percolation_modes = config.percolation_modes |
|
82 |
|
self.advanced_percolation_modes = config.advanced_percolation_modes |
|
83 |
|
|
|
84 |
|
############################################################################## |
|
85 |
|
###### start describes the entire calculation in a high level overview ####### |
|
86 |
|
############################################################################## |
|
87 |
|
|
|
88 |
|
def start(self): |
|
89 |
|
start_time_calculation = dt.datetime.now() |
|
90 |
|
|
|
91 |
|
#preliminary calculations |
|
92 |
|
self.flush_database() |
|
93 |
|
self.obtain_percentages() |
|
94 |
|
self.create_info() |
|
95 |
|
self.create_standard_layout() |
|
96 |
|
self.save_graph_data('raw') |
|
97 |
|
|
|
98 |
|
#index creation |
|
99 |
|
self.create_indexes() |
|
100 |
|
|
|
101 |
|
#main calculations |
|
102 |
|
self.calculate_metrics() |
|
103 |
|
self.calculate_advanced_metrics() |
|
104 |
|
self.normalize_metrics() |
|
105 |
|
self.calculate_scores() |
|
106 |
|
self.calculate_advanced_scores() |
|
107 |
|
|
|
108 |
|
#statistics |
|
109 |
|
self.calculate_statistics() |
|
110 |
|
|
|
111 |
|
#dynamic metrics / percolation |
|
112 |
|
self.calculate_percolation() |
|
113 |
|
|
|
114 |
|
#visualization |
|
115 |
|
self.visualize_graph() |
|
116 |
|
|
|
117 |
|
#save final graph |
|
118 |
|
self.save_graph_data('full') |
|
119 |
|
|
|
120 |
|
#evaluation |
|
121 |
|
self.duration_total = dt.datetime.now() - start_time_calculation |
|
122 |
|
self.evaluate_durations() |
|
123 |
|
|
|
124 |
|
|
|
125 |
|
################### |
|
126 |
|
## PRELIMINARIES ## |
|
127 |
|
################### |
|
128 |
|
def flush_database(self): |
|
129 |
|
# ask to clean all data in Redis |
|
130 |
|
flush_flag = 'Flushing' |
|
131 |
|
while (flush_flag != 'y' and flush_flag != 'n'): |
|
132 |
|
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]") |
|
133 |
|
if flush_flag == 'y': |
|
134 |
|
self.redis.flushdb() |
|
135 |
|
|
|
136 |
|
def obtain_percentages(self): |
|
137 |
|
# obtain percentages for calculation of deterioration # |
|
138 |
|
# and calculate number of nodes to remove from graph ## |
|
139 |
|
percentages = '' # initialise |
|
140 |
|
while (percentages == ''): |
|
141 |
|
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n") |
|
142 |
|
|
|
143 |
|
percentages = sorted([float(pct)for pct in percentages.split()]) |
|
144 |
|
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages] |
|
145 |
|
# create a dictionary of percentages and corresponding numbers of nodes |
|
146 |
|
self.percentages = dict(zip(numbers,percentages)) |
|
147 |
|
# storing values in redis DB |
|
148 |
|
self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages])) |
|
149 |
|
|
|
150 |
|
def create_info(self): |
|
151 |
|
#store general info about graph |
|
152 |
|
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices()) |
|
153 |
|
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges()) |
|
154 |
|
|
|
155 |
|
def create_standard_layout(self): |
|
156 |
|
# create a standard layout |
|
157 |
|
start_time = dt.datetime.now() |
|
158 |
|
print 'Creating standard layout for graph visualization.' |
|
159 |
|
if not hasattr(self.g.vp, 'sfdp'): |
|
160 |
|
self.sfdp = gt.sfdp_layout(self.g, C=0.5) |
|
161 |
|
self.g.vp['sfdp'] = self.sfdp |
|
162 |
|
else: |
|
163 |
|
self.sfdp = self.g.vp['sfdp'] |
|
164 |
|
self.durations['SFDP_layout'] = dt.datetime.now() - start_time |
|
165 |
|
|
|
166 |
|
def save_graph_data(self,name): |
|
167 |
|
# save graph |
|
168 |
|
start_time = dt.datetime.now() |
|
169 |
|
print 'Saving raw graph data' |
|
170 |
|
self.g.save(self.graph_name+'_'+name+'.gt.gz') |
|
171 |
|
self.durations['saving_graph'+name] = dt.datetime.now() - start_time |
|
172 |
|
|
|
173 |
|
################## |
|
174 |
|
#### INDEXING #### |
|
175 |
|
################## |
|
176 |
|
def create_indexes(self): |
|
177 |
|
start_time = dt.datetime.now() |
|
178 |
|
#call methods defined in indexing.py |
|
179 |
|
indexing.index_graph(self) |
|
180 |
|
indexing.index_nodes(self) |
|
181 |
|
indexing.index_neighbors(self) |
|
182 |
|
indexing.index_metrics(self) |
|
183 |
|
indexing.index_scores(self) |
|
184 |
|
#indexing.index_percolation(self) |
|
185 |
|
self.durations['indexing'] = dt.datetime.now() - start_time |
|
186 |
|
|
|
187 |
|
########################### |
|
188 |
|
#### CALCULATION LOOPS #### |
|
189 |
|
########################### |
|
190 |
|
|
|
191 |
|
def calculate_metrics(self): |
|
192 |
|
start_time_total = dt.datetime.now() |
|
193 |
|
# loop through all defined metrics and call specified calculation method for each node |
|
194 |
|
print ('Starting calculate_metrics') |
|
195 |
|
for metric_name in self.base_metrics: |
|
196 |
|
start_time = dt.datetime.now() |
|
197 |
|
metric_method = self.base_metrics[metric_name] |
|
198 |
|
|
|
199 |
|
# loop through all nodes |
|
200 |
|
for node in self.nodes: |
|
201 |
|
# call calculation method of supplied metric for current node |
|
202 |
|
node = int(node) |
|
203 |
|
value = float(metric_method(self,node)) |
|
204 |
|
|
|
205 |
|
#store result in node values |
|
206 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value) |
|
207 |
|
|
|
208 |
|
#also store result to metric set |
|
209 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node)) |
|
210 |
|
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time |
|
211 |
|
self.durations['metrics_total'] = dt.datetime.now() - start_time_total |
|
212 |
|
|
|
213 |
|
|
|
214 |
|
def calculate_advanced_metrics(self): |
|
215 |
|
start_time_total = dt.datetime.now() |
|
216 |
|
# loop through all defined_advanced_metrics and call specified calculation method |
|
217 |
|
print ('Starting calculate_advanced_metrics') |
|
218 |
|
for advanced_metric_name in self.advanced_metrics: |
|
219 |
|
start_time = dt.datetime.now() |
|
220 |
|
metric_method = self.advanced_metrics[advanced_metric_name] |
|
221 |
|
|
|
222 |
|
# loop through all nodes |
|
223 |
|
for node in self.nodes: |
|
224 |
|
node = int(node) |
|
225 |
|
value = float(metric_method(self,node)) |
|
226 |
|
|
|
227 |
|
#store result in node values |
|
228 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value) |
|
229 |
|
|
|
230 |
|
#also store result to metric set |
|
231 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node)) |
|
232 |
|
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time |
|
233 |
|
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total |
|
234 |
|
|
|
235 |
|
|
|
236 |
|
# loop through all defined normalizations and call respective normalization method |
|
237 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash |
|
238 |
|
def normalize_metrics(self): |
|
239 |
|
start_time = dt.datetime.now() |
|
240 |
|
#fallback normalization: min-max |
|
241 |
|
print ('Starting normalize_metrics') |
|
242 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items()) |
|
243 |
|
|
|
244 |
|
for metric_name in all_metrics: |
|
245 |
|
if self.normalization_methods.has_key(metric_name): |
|
246 |
|
normalization_method = self.normalization_methods[metric_name] |
|
247 |
|
else: |
|
248 |
|
#fallback normalization is min-max |
|
249 |
|
normalization_method = normalizations.min_max |
|
250 |
|
normalization_method(self,metric_name) |
|
251 |
|
|
|
252 |
|
self.durations['normalizing'] = dt.datetime.now() - start_time |
|
253 |
|
|
|
254 |
|
|
|
255 |
|
def calculate_scores(self): |
|
256 |
|
start_time = dt.datetime.now() |
|
257 |
|
print ('Starting calculate_scores') |
|
258 |
|
for score_name in self.scores: |
|
259 |
|
metrics_with_weights = self.scores[score_name] |
|
260 |
|
|
|
261 |
|
for node in self.nodes: |
|
262 |
|
score_value = 0.0 |
|
263 |
|
|
|
264 |
|
# get normalized values |
|
265 |
|
for metric in metrics_with_weights: |
|
266 |
|
weight = self.scores[score_name][metric] |
|
267 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix)) |
|
268 |
|
score_value += weight * value |
|
269 |
|
|
|
270 |
|
#redis_server.hset(key, value, number); |
|
271 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value) |
|
272 |
|
|
|
273 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node)) |
|
274 |
|
|
|
275 |
|
self.durations['scores'] = dt.datetime.now() - start_time |
|
276 |
|
|
|
277 |
|
def calculate_advanced_scores(self): |
|
278 |
|
start_time = dt.datetime.now() |
|
279 |
|
print ('Starting calculate_advanced_scores') |
|
280 |
|
for advanced_score in self.advanced_scores: |
|
281 |
|
self.advanced_scores[advanced_score](self) |
|
282 |
|
|
|
283 |
|
self.durations['adv_scores'] = dt.datetime.now() - start_time |
|
284 |
|
|
|
285 |
|
|
|
286 |
|
############# |
|
287 |
|
# statistics |
|
288 |
|
############# |
|
289 |
|
|
|
290 |
|
def calculate_statistics(self): |
|
291 |
|
start_time = dt.datetime.now() |
|
292 |
|
print ('Starting calculate_statistics') |
|
293 |
|
for metric in self.base_metrics: |
|
294 |
|
#absolute and normalized |
|
295 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric) |
|
296 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix) |
|
297 |
|
|
|
298 |
|
for advanced_metric in self.advanced_metrics: |
|
299 |
|
#absolute and normalized |
|
300 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric) |
|
301 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix) |
|
302 |
|
|
|
303 |
|
for score in self.scores: |
|
304 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score) |
|
305 |
|
|
|
306 |
|
for advanced_score in self.advanced_scores: |
|
307 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score) |
|
308 |
|
self.durations['statistics:stats'] = dt.datetime.now() - start_time |
|
309 |
|
|
|
310 |
|
start_time = dt.datetime.now() |
|
311 |
|
statistics.calculate_correlations(self) |
|
312 |
|
self.durations['statistics:corr'] = dt.datetime.now() - start_time |
|
313 |
|
|
|
314 |
|
################### |
|
315 |
|
# dynamic metrics # |
|
316 |
|
################### |
|
317 |
|
|
|
318 |
|
def calculate_percolation(self): |
|
319 |
|
start_time_total = dt.datetime.now() |
|
320 |
|
print ('Starting percolation calculation') |
|
321 |
|
|
|
322 |
|
# shorten the name for percentages and corresponding numbers of nodes to remove |
|
323 |
|
n = self.percentages |
|
324 |
|
|
|
325 |
|
# BASIC PERCOLATION MODES |
|
326 |
|
# basic percolation modes take mode_name and n as input and return a # |
|
327 |
|
# dictionary with percentage of nodes removed as key and percentage ## |
|
328 |
|
# of deterioration as value |
|
329 |
|
for mode_name in self.percolation_modes: |
|
330 |
|
start_time = dt.datetime.now() |
|
331 |
|
# initialise exlusion vertex property map |
|
332 |
|
self.exclusion_map.a = 1 |
|
333 |
|
# read method from config file |
|
334 |
|
mode_method = self.percolation_modes[mode_name] |
|
335 |
|
# execute method |
|
336 |
|
results = mode_method(self,mode_name,n) |
|
337 |
|
# index percolation mode |
|
338 |
|
self.redis.sadd(self.percolation_index_key, mode_name) |
|
339 |
|
# store values |
|
340 |
|
print 'Storing percolation percentages' |
|
341 |
|
for percentage in results: |
|
342 |
|
value = results[percentage] |
|
343 |
|
#store in hash set |
|
344 |
|
self.redis.hset(self.percolation_prefix+mode_name, percentage, value) |
|
345 |
|
|
|
346 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time |
|
347 |
|
|
|
348 |
|
# ADVANCED PERCOLATION MODES |
|
349 |
|
# advanced percolation modes take mode_name and n as input and return a ### |
|
350 |
|
# dictionary with groups of percolation modes (e.g. metrics, countries) ### |
|
351 |
|
# as keys and dictionaries of percentages (removed: deteriorated) as values |
|
352 |
|
for mode_name in self.advanced_percolation_modes: |
|
353 |
|
start_time = dt.datetime.now() |
|
354 |
|
# initialise exlusion vertex property map |
|
355 |
|
self.exclusion_map.a = 1 |
|
356 |
|
# read method from config file |
|
357 |
|
mode_method = self.advanced_percolation_modes[mode_name] |
|
358 |
|
# execute method |
|
359 |
|
results = mode_method(self,mode_name,n) |
|
360 |
|
|
|
361 |
|
# store values |
|
362 |
|
print 'Storing percolation percentages' |
|
363 |
|
for group in results: |
|
364 |
|
# index percolation modes |
|
365 |
|
self.redis.sadd(self.percolation_index_key, mode_name+':'+group) |
|
366 |
|
for percentage in results[group]: |
|
367 |
|
value = results[group][percentage] |
|
368 |
|
#store in hash set |
|
369 |
|
self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value) |
|
370 |
|
|
|
371 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time |
|
372 |
|
|
|
373 |
|
self.durations['percolation_total'] = dt.datetime.now() - start_time_total |
|
374 |
|
|
|
375 |
|
|
|
376 |
|
def visualize_graph(self): |
|
377 |
|
|
|
378 |
|
for layout_name in self.visualization_layouts: |
|
379 |
|
start_time = dt.datetime.now() |
|
380 |
|
print 'Creating visualisation with '+layout_name+' layout' |
|
381 |
|
|
|
382 |
|
layout_method = self.visualization_layouts[layout_name] |
|
383 |
|
pos = layout_method(self) |
|
384 |
|
gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png") |
|
385 |
|
|
|
386 |
|
self.redis.sadd(self.layout_index_key, layout_name) |
|
387 |
|
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time |
|
388 |
|
|
|
389 |
|
def evaluate_durations(self): |
|
390 |
|
#print out times taken |
|
391 |
|
print 'times taken:' |
|
392 |
|
output = open(str(self.graph_name)+"_duration_test.txt","w") |
|
393 |
|
output.write("Graph Name:\t"+str(self.graph_name)+"\n") |
|
394 |
|
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n") |
|
395 |
|
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n") |
|
396 |
|
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n") |
|
397 |
|
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n") |
|
398 |
|
for key in self.durations: |
|
399 |
|
self.durations_in_seconds[key] = self.durations[key].total_seconds() |
|
400 |
|
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0 |
|
401 |
|
|
|
402 |
|
print str(key)+'\t'+str(self.durations_in_percent[key]) |
|
403 |
|
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n') |
File ru_metric_calculator.py changed (mode: 100644) (index 05d8f41..ee1f7cf) |
1 |
|
import networkx as nx
|
|
2 |
|
import graph_tool.all as gt
|
|
3 |
|
import redis as rd
|
|
4 |
|
import numpy as np
|
|
5 |
|
import indexing
|
|
6 |
|
import statistics
|
|
7 |
|
import normalizations
|
|
8 |
|
import config
|
|
9 |
|
import percolation
|
|
10 |
|
import visualization
|
|
11 |
|
import datetime as dt
|
|
12 |
|
|
|
13 |
|
|
|
14 |
|
class MetricCalculator(object):
|
|
15 |
|
def __init__ (self, graph, graph_gt):
|
|
16 |
|
#class constructor
|
|
17 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
|
|
18 |
|
|
|
19 |
|
print ('Starting metric_calculator!')
|
|
20 |
|
|
|
21 |
|
# for code evaluation
|
|
22 |
|
self.start_time = dt.datetime.now()
|
|
23 |
|
self.durations = {}
|
|
24 |
|
self.durations_in_seconds = {}
|
|
25 |
|
self.durations_in_percent = {}
|
|
26 |
|
|
|
27 |
|
self.graph = graph
|
|
28 |
|
self.graph_gt = graph_gt
|
|
29 |
|
|
|
30 |
|
# alternate name for graph tool graph
|
|
31 |
|
self.g = self.graph_gt['graph_gt']
|
|
32 |
|
# alternate name for graph tool labels
|
|
33 |
|
if not hasattr(self.g.vp, 'label_map'):
|
|
34 |
|
self.g.vp.label_map = self.graph_gt['graph_gt_labels']
|
|
35 |
|
self.label_map = self.g.vp.label_map
|
|
36 |
|
# vertex property map for percolation calculations
|
|
37 |
|
if not hasattr(self.g.vp, 'exmap'):
|
|
38 |
|
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
|
|
39 |
|
|
|
40 |
|
self.exclusion_map = self.g.vp.exmap
|
|
41 |
|
self.exclusion_map.a = 1 #initialise filter map
|
|
42 |
|
#find largest component of graph tool graph for percolation calculations
|
|
43 |
|
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
|
|
44 |
|
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
|
|
45 |
|
|
|
46 |
|
if not hasattr(self.g.vp, 'eigenvector'):
|
|
47 |
|
eigenvalue, self.g.vp.eigenvector = gt.eigenvector(self.g)
|
|
48 |
|
if not hasattr(self.g.ep, 'betweenness'):
|
|
49 |
|
betweenness,self.g.ep.betweenness = gt.betweenness(self.g)
|
|
50 |
|
|
|
51 |
|
self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
|
|
52 |
|
self.nodes = nx.nodes(graph)
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access
|
|
56 |
|
self.graph_index_key = config.graph_index_key
|
|
57 |
|
|
|
58 |
|
self.graph_name = ''
|
|
59 |
|
while (self.graph_name == ''):
|
|
60 |
|
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
|
|
61 |
|
|
|
62 |
|
self.info_index_key = self.graph_name+':'+config.info_index_key
|
|
63 |
|
self.node_index_key = self.graph_name+':'+config.node_index_key
|
|
64 |
|
self.metric_index_key = self.graph_name+':'+config.metric_index_key
|
|
65 |
|
self.score_index_key = self.graph_name+':'+config.score_index_key
|
|
66 |
|
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
|
|
67 |
|
self.layout_index_key = self.graph_name+':'+config.layout_index_key
|
|
68 |
|
|
|
69 |
|
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
|
|
70 |
|
self.node_prefix = self.graph_name+':'+config.node_prefix
|
|
71 |
|
self.metric_prefix = self.graph_name+':'+config.metric_prefix
|
|
72 |
|
self.score_prefix = self.graph_name+':'+config.score_prefix
|
|
73 |
|
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
|
|
74 |
|
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
|
|
75 |
|
|
|
76 |
|
self.normalization_suffix = config.normalization_suffix
|
|
77 |
|
|
|
78 |
|
self.base_metrics = config.base_metrics
|
|
79 |
|
self.advanced_metrics = config.advanced_metrics
|
|
80 |
|
|
|
81 |
|
self.normalization_methods = config.normalization_methods
|
|
82 |
|
|
|
83 |
|
self.scores = config.scores
|
|
84 |
|
self.advanced_scores = config.advanced_scores
|
|
85 |
|
|
|
86 |
|
self.visualization_layouts = config.visualization_layouts
|
|
87 |
|
# this is commented out for testing purposes
|
|
88 |
|
# self.percolation_modes = config.percolation_modes
|
|
89 |
|
# self.advanced_percolation_modes = config.advanced_percolation_modes
|
|
90 |
|
|
|
91 |
|
self.percolation_modes = {'russian_shutoff':config.percolation_modes['russian_shutoff']}
|
|
92 |
|
self.advanced_percolation_modes = {}
|
|
93 |
|
|
|
94 |
|
##############################################################################
|
|
95 |
|
###### start describes the entire calculation in a high level overview #######
|
|
96 |
|
##############################################################################
|
|
97 |
|
|
|
98 |
|
def start(self):
|
|
99 |
|
start_time_calculation = dt.datetime.now()
|
|
100 |
|
|
|
101 |
|
#preliminary calculations
|
|
102 |
|
#self.flush_database()
|
|
103 |
|
self.obtain_percentages()
|
|
104 |
|
#self.create_info()
|
|
105 |
|
#self.create_standard_layout()
|
|
106 |
|
#self.save_graph_data('raw')
|
|
107 |
|
|
|
108 |
|
#index creation
|
|
109 |
|
#self.create_indexes()
|
|
110 |
|
|
|
111 |
|
#main calculations
|
|
112 |
|
#self.calculate_metrics()
|
|
113 |
|
#self.calculate_advanced_metrics()
|
|
114 |
|
#self.normalize_metrics()
|
|
115 |
|
#self.calculate_scores()
|
|
116 |
|
#self.calculate_advanced_scores()
|
|
117 |
|
|
|
118 |
|
#statistics
|
|
119 |
|
#self.calculate_statistics()
|
|
120 |
|
|
|
121 |
|
#dynamic metrics / percolation
|
|
122 |
|
self.calculate_percolation()
|
|
123 |
|
|
|
124 |
|
#visualization
|
|
125 |
|
#self.visualize_graph()
|
|
126 |
|
|
|
127 |
|
#save final graph
|
|
128 |
|
self.save_graph_data('russian')
|
|
129 |
|
|
|
130 |
|
#evaluation
|
|
131 |
|
self.duration_total = dt.datetime.now() - start_time_calculation
|
|
132 |
|
self.evaluate_durations()
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
###################
|
|
136 |
|
## PRELIMINARIES ##
|
|
137 |
|
###################
|
|
138 |
|
def flush_database(self):
|
|
139 |
|
# ask to clean all data in Redis
|
|
140 |
|
flush_flag = 'Flushing'
|
|
141 |
|
while (flush_flag != 'y' and flush_flag != 'n'):
|
|
142 |
|
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
|
|
143 |
|
if flush_flag == 'y':
|
|
144 |
|
self.redis.flushdb()
|
|
145 |
|
|
|
146 |
|
def obtain_percentages(self):
|
|
147 |
|
# obtain percentages for calculation of deterioration #
|
|
148 |
|
# and calculate number of nodes to remove from graph ##
|
|
149 |
|
percentages = '' # initialise
|
|
150 |
|
while (percentages == ''):
|
|
151 |
|
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
|
|
152 |
|
|
|
153 |
|
percentages = sorted([float(pct)for pct in percentages.split()])
|
|
154 |
|
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
|
|
155 |
|
# create a dictionary of percentages and corresponding numbers of nodes
|
|
156 |
|
self.percentages = dict(zip(numbers,percentages))
|
|
157 |
|
# storing values in redis DB
|
|
158 |
|
#self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
|
|
159 |
|
|
|
160 |
|
def create_info(self):
|
|
161 |
|
#store general info about graph
|
|
162 |
|
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
|
|
163 |
|
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
|
|
164 |
|
|
|
165 |
|
def create_standard_layout(self):
|
|
166 |
|
# create a standard layout
|
|
167 |
|
start_time = dt.datetime.now()
|
|
168 |
|
print 'Creating standard layout for graph visualization.'
|
|
169 |
|
if not hasattr(self.g.vp,'sfdp'):
|
|
170 |
|
self.g.vp.sfdp = gt.sfdp_layout(self.g, C=0.5)
|
|
171 |
|
#self.durations['SFDP_layout'] = dt.datetime.now() - start_time
|
|
172 |
|
print self.durations['SFDP_layout']
|
|
173 |
|
|
|
174 |
|
def save_graph_data(self,name):
|
|
175 |
|
# save graph
|
|
176 |
|
start_time = dt.datetime.now()
|
|
177 |
|
print 'Saving raw graph data'
|
|
178 |
|
self.g.save(self.graph_name+'_'+name+'.gt.gz')
|
|
179 |
|
self.durations['saving_graph'+name] = dt.datetime.now() - start_time
|
|
180 |
|
|
|
181 |
|
##################
|
|
182 |
|
#### INDEXING ####
|
|
183 |
|
##################
|
|
184 |
|
def create_indexes(self):
|
|
185 |
|
start_time = dt.datetime.now()
|
|
186 |
|
#call methods defined in indexing.py
|
|
187 |
|
#indexing.index_graph(self)
|
|
188 |
|
#indexing.index_nodes(self)
|
|
189 |
|
#indexing.index_neighbors(self)
|
|
190 |
|
#indexing.index_metrics(self)
|
|
191 |
|
#indexing.index_scores(self)
|
|
192 |
|
#indexing.index_percolation(self)
|
|
193 |
|
self.durations['indexing'] = dt.datetime.now() - start_time
|
|
194 |
|
|
|
195 |
|
###########################
|
|
196 |
|
#### CALCULATION LOOPS ####
|
|
197 |
|
###########################
|
|
198 |
|
|
|
199 |
|
def calculate_metrics(self):
|
|
200 |
|
start_time_total = dt.datetime.now()
|
|
201 |
|
# loop through all defined metrics and call specified calculation method for each node
|
|
202 |
|
print ('Starting calculate_metrics')
|
|
203 |
|
for metric_name in self.base_metrics:
|
|
204 |
|
start_time = dt.datetime.now()
|
|
205 |
|
metric_method = self.base_metrics[metric_name]
|
|
206 |
|
|
|
207 |
|
# loop through all nodes
|
|
208 |
|
for node in self.nodes:
|
|
209 |
|
# call calculation method of supplied metric for current node
|
|
210 |
|
node = int(node)
|
|
211 |
|
value = float(metric_method(self,node))
|
|
212 |
|
|
|
213 |
|
#store result in node values
|
|
214 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value)
|
|
215 |
|
|
|
216 |
|
#also store result to metric set
|
|
217 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
|
218 |
|
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
|
|
219 |
|
self.durations['metrics_total'] = dt.datetime.now() - start_time_total
|
|
220 |
|
|
|
221 |
|
|
|
222 |
|
def calculate_advanced_metrics(self):
|
|
223 |
|
start_time_total = dt.datetime.now()
|
|
224 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
|
225 |
|
print ('Starting calculate_advanced_metrics')
|
|
226 |
|
for advanced_metric_name in self.advanced_metrics:
|
|
227 |
|
start_time = dt.datetime.now()
|
|
228 |
|
metric_method = self.advanced_metrics[advanced_metric_name]
|
|
229 |
|
|
|
230 |
|
# loop through all nodes
|
|
231 |
|
for node in self.nodes:
|
|
232 |
|
node = int(node)
|
|
233 |
|
value = float(metric_method(self,node))
|
|
234 |
|
|
|
235 |
|
#store result in node values
|
|
236 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
|
|
237 |
|
|
|
238 |
|
#also store result to metric set
|
|
239 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
|
|
240 |
|
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
|
|
241 |
|
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
|
|
242 |
|
|
|
243 |
|
|
|
244 |
|
# loop through all defined normalizations and call respective normalization method
|
|
245 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash
|
|
246 |
|
def normalize_metrics(self):
|
|
247 |
|
start_time = dt.datetime.now()
|
|
248 |
|
#fallback normalization: min-max
|
|
249 |
|
print ('Starting normalize_metrics')
|
|
250 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
|
|
251 |
|
|
|
252 |
|
for metric_name in all_metrics:
|
|
253 |
|
if self.normalization_methods.has_key(metric_name):
|
|
254 |
|
normalization_method = self.normalization_methods[metric_name]
|
|
255 |
|
else:
|
|
256 |
|
#fallback normalization is min-max
|
|
257 |
|
normalization_method = normalizations.min_max
|
|
258 |
|
normalization_method(self,metric_name)
|
|
259 |
|
|
|
260 |
|
self.durations['normalizing'] = dt.datetime.now() - start_time
|
|
261 |
|
|
|
262 |
|
|
|
263 |
|
def calculate_scores(self):
|
|
264 |
|
start_time = dt.datetime.now()
|
|
265 |
|
print ('Starting calculate_scores')
|
|
266 |
|
for score_name in self.scores:
|
|
267 |
|
metrics_with_weights = self.scores[score_name]
|
|
268 |
|
|
|
269 |
|
for node in self.nodes:
|
|
270 |
|
score_value = 0.0
|
|
271 |
|
|
|
272 |
|
# get normalized values
|
|
273 |
|
for metric in metrics_with_weights:
|
|
274 |
|
weight = self.scores[score_name][metric]
|
|
275 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
|
|
276 |
|
score_value += weight * value
|
|
277 |
|
|
|
278 |
|
#redis_server.hset(key, value, number);
|
|
279 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
|
|
280 |
|
|
|
281 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
|
|
282 |
|
|
|
283 |
|
self.durations['scores'] = dt.datetime.now() - start_time
|
|
284 |
|
|
|
285 |
|
def calculate_advanced_scores(self):
|
|
286 |
|
start_time = dt.datetime.now()
|
|
287 |
|
print ('Starting calculate_advanced_scores')
|
|
288 |
|
for advanced_score in self.advanced_scores:
|
|
289 |
|
self.advanced_scores[advanced_score](self)
|
|
290 |
|
|
|
291 |
|
self.durations['adv_scores'] = dt.datetime.now() - start_time
|
|
292 |
|
|
|
293 |
|
|
|
294 |
|
#############
|
|
295 |
|
# statistics
|
|
296 |
|
#############
|
|
297 |
|
|
|
298 |
|
def calculate_statistics(self):
|
|
299 |
|
start_time = dt.datetime.now()
|
|
300 |
|
print ('Starting calculate_statistics')
|
|
301 |
|
for metric in self.base_metrics:
|
|
302 |
|
#absolute and normalized
|
|
303 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
|
|
304 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
|
|
305 |
|
|
|
306 |
|
for advanced_metric in self.advanced_metrics:
|
|
307 |
|
#absolute and normalized
|
|
308 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
|
|
309 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
|
|
310 |
|
|
|
311 |
|
for score in self.scores:
|
|
312 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score)
|
|
313 |
|
|
|
314 |
|
for advanced_score in self.advanced_scores:
|
|
315 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
|
|
316 |
|
self.durations['statistics:stats'] = dt.datetime.now() - start_time
|
|
317 |
|
|
|
318 |
|
start_time = dt.datetime.now()
|
|
319 |
|
statistics.calculate_correlations(self)
|
|
320 |
|
self.durations['statistics:corr'] = dt.datetime.now() - start_time
|
|
321 |
|
|
|
322 |
|
###################
|
|
323 |
|
# dynamic metrics #
|
|
324 |
|
###################
|
|
325 |
|
|
|
326 |
|
def calculate_percolation(self):
|
|
327 |
|
start_time_total = dt.datetime.now()
|
|
328 |
|
print ('Starting percolation calculation')
|
|
329 |
|
|
|
330 |
|
# shorten the name for percentages and corresponding numbers of nodes to remove
|
|
331 |
|
n = self.percentages
|
|
332 |
|
|
|
333 |
|
# BASIC PERCOLATION MODES
|
|
334 |
|
# basic percolation modes take mode_name and n as input and return a #
|
|
335 |
|
# dictionary with percentage of nodes removed as key and percentage ##
|
|
336 |
|
# of deterioration as value
|
|
337 |
|
for mode_name in self.percolation_modes:
|
|
338 |
|
start_time = dt.datetime.now()
|
|
339 |
|
# initialise exlusion vertex property map
|
|
340 |
|
self.exclusion_map.a = 1
|
|
341 |
|
# read method from config file
|
|
342 |
|
mode_method = self.percolation_modes[mode_name]
|
|
343 |
|
# execute method
|
|
344 |
|
#results = mode_method(self,mode_name,n)
|
|
345 |
|
mode_method(self,mode_name,n)
|
|
346 |
|
# index percolation mode
|
|
347 |
|
#self.redis.sadd(self.percolation_index_key, mode_name)
|
|
348 |
|
# store values
|
|
349 |
|
#print 'Storing percolation percentages'
|
|
350 |
|
#for percentage in results:
|
|
351 |
|
# value = results[percentage]
|
|
352 |
|
#store in hash set
|
|
353 |
|
#self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
|
|
354 |
|
|
|
355 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
356 |
|
|
|
357 |
|
# ADVANCED PERCOLATION MODES
|
|
358 |
|
# advanced percolation modes take mode_name and n as input and return a ###
|
|
359 |
|
# dictionary with groups of percolation modes (e.g. metrics, countries) ###
|
|
360 |
|
# as keys and dictionaries of percentages (removed: deteriorated) as values
|
|
361 |
|
for mode_name in self.advanced_percolation_modes:
|
|
362 |
|
start_time = dt.datetime.now()
|
|
363 |
|
# initialise exlusion vertex property map
|
|
364 |
|
self.exclusion_map.a = 1
|
|
365 |
|
# read method from config file
|
|
366 |
|
mode_method = self.advanced_percolation_modes[mode_name]
|
|
367 |
|
# execute method
|
|
368 |
|
results = mode_method(self,mode_name,n)
|
|
369 |
|
|
|
370 |
|
# store values
|
|
371 |
|
#print 'Storing percolation percentages'
|
|
372 |
|
#for group in results:
|
|
373 |
|
# index percolation modes
|
|
374 |
|
# self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
|
|
375 |
|
#for percentage in results[group]:
|
|
376 |
|
# value = results[group][percentage]
|
|
377 |
|
#store in hash set
|
|
378 |
|
#self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
|
|
379 |
|
|
|
380 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
|
|
381 |
|
|
|
382 |
|
self.durations['percolation_total'] = dt.datetime.now() - start_time_total
|
|
383 |
|
|
|
384 |
|
|
|
385 |
|
def visualize_graph(self):
|
|
386 |
|
|
|
387 |
|
for layout_name in self.visualization_layouts:
|
|
388 |
|
start_time = dt.datetime.now()
|
|
389 |
|
print 'Creating visualisation with '+layout_name+' layout'
|
|
390 |
|
|
|
391 |
|
layout_method = self.visualization_layouts[layout_name]
|
|
392 |
|
self.g.vp[layout_name] = layout_method(self)
|
|
393 |
|
gt.graph_draw(self.glc, pos=self.g.vp[layout_name], output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
|
|
394 |
|
|
|
395 |
|
self.redis.sadd(self.layout_index_key, layout_name)
|
|
396 |
|
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
|
|
397 |
|
print self.durations['layout:'+layout_name]
|
|
398 |
|
|
|
399 |
|
def evaluate_durations(self):
|
|
400 |
|
#print out times taken
|
|
401 |
|
print 'times taken:'
|
|
402 |
|
output = open(str(self.graph_name)+"_duration_test_2.txt","w")
|
|
403 |
|
output.write("Graph Name:\t"+str(self.graph_name)+"\n")
|
|
404 |
|
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
|
|
405 |
|
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
|
|
406 |
|
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
|
|
407 |
|
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
|
|
408 |
|
for key in self.durations:
|
|
409 |
|
self.durations_in_seconds[key] = self.durations[key].total_seconds()
|
|
410 |
|
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
|
|
411 |
|
|
|
412 |
|
print str(key)+'\t'+str(self.durations_in_percent[key])
|
|
413 |
|
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')
|
|
|
1 |
|
import networkx as nx |
|
2 |
|
import graph_tool.all as gt |
|
3 |
|
import redis as rd |
|
4 |
|
import numpy as np |
|
5 |
|
import indexing |
|
6 |
|
import statistics |
|
7 |
|
import normalizations |
|
8 |
|
import config |
|
9 |
|
import percolation |
|
10 |
|
import visualization |
|
11 |
|
import datetime as dt |
|
12 |
|
|
|
13 |
|
|
|
14 |
|
class MetricCalculator(object): |
|
15 |
|
def __init__ (self, graph, graph_gt): |
|
16 |
|
#class constructor |
|
17 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph |
|
18 |
|
|
|
19 |
|
print ('Starting metric_calculator!') |
|
20 |
|
|
|
21 |
|
# for code evaluation |
|
22 |
|
self.start_time = dt.datetime.now() |
|
23 |
|
self.durations = {} |
|
24 |
|
self.durations_in_seconds = {} |
|
25 |
|
self.durations_in_percent = {} |
|
26 |
|
|
|
27 |
|
self.graph = graph |
|
28 |
|
self.graph_gt = graph_gt |
|
29 |
|
|
|
30 |
|
# alternate name for graph tool graph |
|
31 |
|
self.g = self.graph_gt['graph_gt'] |
|
32 |
|
# alternate name for graph tool labels |
|
33 |
|
if not hasattr(self.g.vp, 'label_map'): |
|
34 |
|
self.g.vp.label_map = self.graph_gt['graph_gt_labels'] |
|
35 |
|
self.label_map = self.g.vp.label_map |
|
36 |
|
# vertex property map for percolation calculations |
|
37 |
|
if not hasattr(self.g.vp, 'exmap'): |
|
38 |
|
self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map |
|
39 |
|
|
|
40 |
|
self.exclusion_map = self.g.vp.exmap |
|
41 |
|
self.exclusion_map.a = 1 #initialise filter map |
|
42 |
|
#find largest component of graph tool graph for percolation calculations |
|
43 |
|
# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph) |
|
44 |
|
self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g)) |
|
45 |
|
|
|
46 |
|
if not hasattr(self.g.vp, 'eigenvector'): |
|
47 |
|
eigenvalue, self.g.vp.eigenvector = gt.eigenvector(self.g) |
|
48 |
|
if not hasattr(self.g.ep, 'betweenness'): |
|
49 |
|
betweenness,self.g.ep.betweenness = gt.betweenness(self.g) |
|
50 |
|
|
|
51 |
|
self.redis = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1) |
|
52 |
|
self.nodes = nx.nodes(graph) |
|
53 |
|
|
|
54 |
|
|
|
55 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access |
|
56 |
|
self.graph_index_key = config.graph_index_key |
|
57 |
|
|
|
58 |
|
self.graph_name = '' |
|
59 |
|
while (self.graph_name == ''): |
|
60 |
|
self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n") |
|
61 |
|
|
|
62 |
|
self.info_index_key = self.graph_name+':'+config.info_index_key |
|
63 |
|
self.node_index_key = self.graph_name+':'+config.node_index_key |
|
64 |
|
self.metric_index_key = self.graph_name+':'+config.metric_index_key |
|
65 |
|
self.score_index_key = self.graph_name+':'+config.score_index_key |
|
66 |
|
self.percolation_index_key = self.graph_name+':'+config.percolation_index_key |
|
67 |
|
self.layout_index_key = self.graph_name+':'+config.layout_index_key |
|
68 |
|
|
|
69 |
|
self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix |
|
70 |
|
self.node_prefix = self.graph_name+':'+config.node_prefix |
|
71 |
|
self.metric_prefix = self.graph_name+':'+config.metric_prefix |
|
72 |
|
self.score_prefix = self.graph_name+':'+config.score_prefix |
|
73 |
|
self.statistics_prefix = self.graph_name+':'+config.statistics_prefix |
|
74 |
|
self.percolation_prefix = self.graph_name+':'+config.percolation_prefix |
|
75 |
|
|
|
76 |
|
self.normalization_suffix = config.normalization_suffix |
|
77 |
|
|
|
78 |
|
self.base_metrics = config.base_metrics |
|
79 |
|
self.advanced_metrics = config.advanced_metrics |
|
80 |
|
|
|
81 |
|
self.normalization_methods = config.normalization_methods |
|
82 |
|
|
|
83 |
|
self.scores = config.scores |
|
84 |
|
self.advanced_scores = config.advanced_scores |
|
85 |
|
|
|
86 |
|
self.visualization_layouts = config.visualization_layouts |
|
87 |
|
# this is commented out for testing purposes |
|
88 |
|
# self.percolation_modes = config.percolation_modes |
|
89 |
|
# self.advanced_percolation_modes = config.advanced_percolation_modes |
|
90 |
|
|
|
91 |
|
self.percolation_modes = {'russian_shutoff':config.percolation_modes['russian_shutoff']} |
|
92 |
|
self.advanced_percolation_modes = {} |
|
93 |
|
|
|
94 |
|
############################################################################## |
|
95 |
|
###### start describes the entire calculation in a high level overview ####### |
|
96 |
|
############################################################################## |
|
97 |
|
|
|
98 |
|
def start(self): |
|
99 |
|
start_time_calculation = dt.datetime.now() |
|
100 |
|
|
|
101 |
|
#preliminary calculations |
|
102 |
|
#self.flush_database() |
|
103 |
|
self.obtain_percentages() |
|
104 |
|
#self.create_info() |
|
105 |
|
#self.create_standard_layout() |
|
106 |
|
#self.save_graph_data('raw') |
|
107 |
|
|
|
108 |
|
#index creation |
|
109 |
|
#self.create_indexes() |
|
110 |
|
|
|
111 |
|
#main calculations |
|
112 |
|
#self.calculate_metrics() |
|
113 |
|
#self.calculate_advanced_metrics() |
|
114 |
|
#self.normalize_metrics() |
|
115 |
|
#self.calculate_scores() |
|
116 |
|
#self.calculate_advanced_scores() |
|
117 |
|
|
|
118 |
|
#statistics |
|
119 |
|
#self.calculate_statistics() |
|
120 |
|
|
|
121 |
|
#dynamic metrics / percolation |
|
122 |
|
self.calculate_percolation() |
|
123 |
|
|
|
124 |
|
#visualization |
|
125 |
|
#self.visualize_graph() |
|
126 |
|
|
|
127 |
|
#save final graph |
|
128 |
|
self.save_graph_data('russian') |
|
129 |
|
|
|
130 |
|
#evaluation |
|
131 |
|
self.duration_total = dt.datetime.now() - start_time_calculation |
|
132 |
|
self.evaluate_durations() |
|
133 |
|
|
|
134 |
|
|
|
135 |
|
################### |
|
136 |
|
## PRELIMINARIES ## |
|
137 |
|
################### |
|
138 |
|
def flush_database(self): |
|
139 |
|
# ask to clean all data in Redis |
|
140 |
|
flush_flag = 'Flushing' |
|
141 |
|
while (flush_flag != 'y' and flush_flag != 'n'): |
|
142 |
|
flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]") |
|
143 |
|
if flush_flag == 'y': |
|
144 |
|
self.redis.flushdb() |
|
145 |
|
|
|
146 |
|
def obtain_percentages(self): |
|
147 |
|
# obtain percentages for calculation of deterioration # |
|
148 |
|
# and calculate number of nodes to remove from graph ## |
|
149 |
|
percentages = '' # initialise |
|
150 |
|
while (percentages == ''): |
|
151 |
|
percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n") |
|
152 |
|
|
|
153 |
|
percentages = sorted([float(pct)for pct in percentages.split()]) |
|
154 |
|
numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages] |
|
155 |
|
# create a dictionary of percentages and corresponding numbers of nodes |
|
156 |
|
self.percentages = dict(zip(numbers,percentages)) |
|
157 |
|
# storing values in redis DB |
|
158 |
|
#self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages])) |
|
159 |
|
|
|
160 |
|
def create_info(self): |
|
161 |
|
#store general info about graph |
|
162 |
|
self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices()) |
|
163 |
|
self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges()) |
|
164 |
|
|
|
165 |
|
def create_standard_layout(self): |
|
166 |
|
# create a standard layout |
|
167 |
|
start_time = dt.datetime.now() |
|
168 |
|
print 'Creating standard layout for graph visualization.' |
|
169 |
|
if not hasattr(self.g.vp,'sfdp'): |
|
170 |
|
self.g.vp.sfdp = gt.sfdp_layout(self.g, C=0.5) |
|
171 |
|
#self.durations['SFDP_layout'] = dt.datetime.now() - start_time |
|
172 |
|
print self.durations['SFDP_layout'] |
|
173 |
|
|
|
174 |
|
def save_graph_data(self,name): |
|
175 |
|
# save graph |
|
176 |
|
start_time = dt.datetime.now() |
|
177 |
|
print 'Saving raw graph data' |
|
178 |
|
self.g.save(self.graph_name+'_'+name+'.gt.gz') |
|
179 |
|
self.durations['saving_graph'+name] = dt.datetime.now() - start_time |
|
180 |
|
|
|
181 |
|
################## |
|
182 |
|
#### INDEXING #### |
|
183 |
|
################## |
|
184 |
|
def create_indexes(self): |
|
185 |
|
start_time = dt.datetime.now() |
|
186 |
|
#call methods defined in indexing.py |
|
187 |
|
#indexing.index_graph(self) |
|
188 |
|
#indexing.index_nodes(self) |
|
189 |
|
#indexing.index_neighbors(self) |
|
190 |
|
#indexing.index_metrics(self) |
|
191 |
|
#indexing.index_scores(self) |
|
192 |
|
#indexing.index_percolation(self) |
|
193 |
|
self.durations['indexing'] = dt.datetime.now() - start_time |
|
194 |
|
|
|
195 |
|
########################### |
|
196 |
|
#### CALCULATION LOOPS #### |
|
197 |
|
########################### |
|
198 |
|
|
|
199 |
|
def calculate_metrics(self): |
|
200 |
|
start_time_total = dt.datetime.now() |
|
201 |
|
# loop through all defined metrics and call specified calculation method for each node |
|
202 |
|
print ('Starting calculate_metrics') |
|
203 |
|
for metric_name in self.base_metrics: |
|
204 |
|
start_time = dt.datetime.now() |
|
205 |
|
metric_method = self.base_metrics[metric_name] |
|
206 |
|
|
|
207 |
|
# loop through all nodes |
|
208 |
|
for node in self.nodes: |
|
209 |
|
# call calculation method of supplied metric for current node |
|
210 |
|
node = int(node) |
|
211 |
|
value = float(metric_method(self,node)) |
|
212 |
|
|
|
213 |
|
#store result in node values |
|
214 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value) |
|
215 |
|
|
|
216 |
|
#also store result to metric set |
|
217 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node)) |
|
218 |
|
self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time |
|
219 |
|
self.durations['metrics_total'] = dt.datetime.now() - start_time_total |
|
220 |
|
|
|
221 |
|
|
|
222 |
|
def calculate_advanced_metrics(self): |
|
223 |
|
start_time_total = dt.datetime.now() |
|
224 |
|
# loop through all defined_advanced_metrics and call specified calculation method |
|
225 |
|
print ('Starting calculate_advanced_metrics') |
|
226 |
|
for advanced_metric_name in self.advanced_metrics: |
|
227 |
|
start_time = dt.datetime.now() |
|
228 |
|
metric_method = self.advanced_metrics[advanced_metric_name] |
|
229 |
|
|
|
230 |
|
# loop through all nodes |
|
231 |
|
for node in self.nodes: |
|
232 |
|
node = int(node) |
|
233 |
|
value = float(metric_method(self,node)) |
|
234 |
|
|
|
235 |
|
#store result in node values |
|
236 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value) |
|
237 |
|
|
|
238 |
|
#also store result to metric set |
|
239 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node)) |
|
240 |
|
self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time |
|
241 |
|
self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total |
|
242 |
|
|
|
243 |
|
|
|
244 |
|
# loop through all defined normalizations and call respective normalization method |
|
245 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash |
|
246 |
|
def normalize_metrics(self): |
|
247 |
|
start_time = dt.datetime.now() |
|
248 |
|
#fallback normalization: min-max |
|
249 |
|
print ('Starting normalize_metrics') |
|
250 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items()) |
|
251 |
|
|
|
252 |
|
for metric_name in all_metrics: |
|
253 |
|
if self.normalization_methods.has_key(metric_name): |
|
254 |
|
normalization_method = self.normalization_methods[metric_name] |
|
255 |
|
else: |
|
256 |
|
#fallback normalization is min-max |
|
257 |
|
normalization_method = normalizations.min_max |
|
258 |
|
normalization_method(self,metric_name) |
|
259 |
|
|
|
260 |
|
self.durations['normalizing'] = dt.datetime.now() - start_time |
|
261 |
|
|
|
262 |
|
|
|
263 |
|
def calculate_scores(self): |
|
264 |
|
start_time = dt.datetime.now() |
|
265 |
|
print ('Starting calculate_scores') |
|
266 |
|
for score_name in self.scores: |
|
267 |
|
metrics_with_weights = self.scores[score_name] |
|
268 |
|
|
|
269 |
|
for node in self.nodes: |
|
270 |
|
score_value = 0.0 |
|
271 |
|
|
|
272 |
|
# get normalized values |
|
273 |
|
for metric in metrics_with_weights: |
|
274 |
|
weight = self.scores[score_name][metric] |
|
275 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix)) |
|
276 |
|
score_value += weight * value |
|
277 |
|
|
|
278 |
|
#redis_server.hset(key, value, number); |
|
279 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value) |
|
280 |
|
|
|
281 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node)) |
|
282 |
|
|
|
283 |
|
self.durations['scores'] = dt.datetime.now() - start_time |
|
284 |
|
|
|
285 |
|
def calculate_advanced_scores(self): |
|
286 |
|
start_time = dt.datetime.now() |
|
287 |
|
print ('Starting calculate_advanced_scores') |
|
288 |
|
for advanced_score in self.advanced_scores: |
|
289 |
|
self.advanced_scores[advanced_score](self) |
|
290 |
|
|
|
291 |
|
self.durations['adv_scores'] = dt.datetime.now() - start_time |
|
292 |
|
|
|
293 |
|
|
|
294 |
|
############# |
|
295 |
|
# statistics |
|
296 |
|
############# |
|
297 |
|
|
|
298 |
|
def calculate_statistics(self): |
|
299 |
|
start_time = dt.datetime.now() |
|
300 |
|
print ('Starting calculate_statistics') |
|
301 |
|
for metric in self.base_metrics: |
|
302 |
|
#absolute and normalized |
|
303 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric) |
|
304 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix) |
|
305 |
|
|
|
306 |
|
for advanced_metric in self.advanced_metrics: |
|
307 |
|
#absolute and normalized |
|
308 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric) |
|
309 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix) |
|
310 |
|
|
|
311 |
|
for score in self.scores: |
|
312 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score) |
|
313 |
|
|
|
314 |
|
for advanced_score in self.advanced_scores: |
|
315 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score) |
|
316 |
|
self.durations['statistics:stats'] = dt.datetime.now() - start_time |
|
317 |
|
|
|
318 |
|
start_time = dt.datetime.now() |
|
319 |
|
statistics.calculate_correlations(self) |
|
320 |
|
self.durations['statistics:corr'] = dt.datetime.now() - start_time |
|
321 |
|
|
|
322 |
|
################### |
|
323 |
|
# dynamic metrics # |
|
324 |
|
################### |
|
325 |
|
|
|
326 |
|
def calculate_percolation(self): |
|
327 |
|
start_time_total = dt.datetime.now() |
|
328 |
|
print ('Starting percolation calculation') |
|
329 |
|
|
|
330 |
|
# shorten the name for percentages and corresponding numbers of nodes to remove |
|
331 |
|
n = self.percentages |
|
332 |
|
|
|
333 |
|
# BASIC PERCOLATION MODES |
|
334 |
|
# basic percolation modes take mode_name and n as input and return a # |
|
335 |
|
# dictionary with percentage of nodes removed as key and percentage ## |
|
336 |
|
# of deterioration as value |
|
337 |
|
for mode_name in self.percolation_modes: |
|
338 |
|
start_time = dt.datetime.now() |
|
339 |
|
# initialise exlusion vertex property map |
|
340 |
|
self.exclusion_map.a = 1 |
|
341 |
|
# read method from config file |
|
342 |
|
mode_method = self.percolation_modes[mode_name] |
|
343 |
|
# execute method |
|
344 |
|
#results = mode_method(self,mode_name,n) |
|
345 |
|
mode_method(self,mode_name,n) |
|
346 |
|
# index percolation mode |
|
347 |
|
#self.redis.sadd(self.percolation_index_key, mode_name) |
|
348 |
|
# store values |
|
349 |
|
#print 'Storing percolation percentages' |
|
350 |
|
#for percentage in results: |
|
351 |
|
# value = results[percentage] |
|
352 |
|
#store in hash set |
|
353 |
|
#self.redis.hset(self.percolation_prefix+mode_name, percentage, value) |
|
354 |
|
|
|
355 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time |
|
356 |
|
|
|
357 |
|
# ADVANCED PERCOLATION MODES |
|
358 |
|
# advanced percolation modes take mode_name and n as input and return a ### |
|
359 |
|
# dictionary with groups of percolation modes (e.g. metrics, countries) ### |
|
360 |
|
# as keys and dictionaries of percentages (removed: deteriorated) as values |
|
361 |
|
for mode_name in self.advanced_percolation_modes: |
|
362 |
|
start_time = dt.datetime.now() |
|
363 |
|
# initialise exlusion vertex property map |
|
364 |
|
self.exclusion_map.a = 1 |
|
365 |
|
# read method from config file |
|
366 |
|
mode_method = self.advanced_percolation_modes[mode_name] |
|
367 |
|
# execute method |
|
368 |
|
results = mode_method(self,mode_name,n) |
|
369 |
|
|
|
370 |
|
# store values |
|
371 |
|
#print 'Storing percolation percentages' |
|
372 |
|
#for group in results: |
|
373 |
|
# index percolation modes |
|
374 |
|
# self.redis.sadd(self.percolation_index_key, mode_name+':'+group) |
|
375 |
|
#for percentage in results[group]: |
|
376 |
|
# value = results[group][percentage] |
|
377 |
|
#store in hash set |
|
378 |
|
#self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value) |
|
379 |
|
|
|
380 |
|
self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time |
|
381 |
|
|
|
382 |
|
self.durations['percolation_total'] = dt.datetime.now() - start_time_total |
|
383 |
|
|
|
384 |
|
|
|
385 |
|
def visualize_graph(self): |
|
386 |
|
|
|
387 |
|
for layout_name in self.visualization_layouts: |
|
388 |
|
start_time = dt.datetime.now() |
|
389 |
|
print 'Creating visualisation with '+layout_name+' layout' |
|
390 |
|
|
|
391 |
|
layout_method = self.visualization_layouts[layout_name] |
|
392 |
|
self.g.vp[layout_name] = layout_method(self) |
|
393 |
|
gt.graph_draw(self.glc, pos=self.g.vp[layout_name], output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png") |
|
394 |
|
|
|
395 |
|
self.redis.sadd(self.layout_index_key, layout_name) |
|
396 |
|
self.durations['layout:'+layout_name] = dt.datetime.now() - start_time |
|
397 |
|
print self.durations['layout:'+layout_name] |
|
398 |
|
|
|
399 |
|
def evaluate_durations(self): |
|
400 |
|
#print out times taken |
|
401 |
|
print 'times taken:' |
|
402 |
|
output = open(str(self.graph_name)+"_duration_test_2.txt","w") |
|
403 |
|
output.write("Graph Name:\t"+str(self.graph_name)+"\n") |
|
404 |
|
output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n") |
|
405 |
|
output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n") |
|
406 |
|
output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n") |
|
407 |
|
output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n") |
|
408 |
|
for key in self.durations: |
|
409 |
|
self.durations_in_seconds[key] = self.durations[key].total_seconds() |
|
410 |
|
self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0 |
|
411 |
|
|
|
412 |
|
print str(key)+'\t'+str(self.durations_in_percent[key]) |
|
413 |
|
output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n') |