List of commits:
Subject Hash Author Date (UTC)
added the backend code 7cd35621d7adef15ecc88a7ad61d2709530f8a52 lizzzi111 2016-05-12 21:29:37
Commit 7cd35621d7adef15ecc88a7ad61d2709530f8a52 - added the backend code
Author: lizzzi111
Author date (UTC): 2016-05-12 21:29
Committer name: lizzzi111
Committer date (UTC): 2016-05-12 21:29
Parent(s):
Signer:
Signing key:
Signing status: N
Tree: 56901bdaf52bd3c5df85936e4e14abd8dfa92857
File Lines added Lines deleted
README.md 6 0
RU_ASN.txt 5570 0
RedisHelpers/del_keys_in_db.py 24 0
RedisHelpers/flush_db.py 19 0
RedisHelpers/move_keys_from_db_to_db.py 36 0
RedisHelpers/redis_test.py 23 0
RedisHelpers/search_db.py 20 0
advancedscores.py 33 0
advancedscores.pyc 0 0
change_output_file.py 52 0
config.py 134 0
config.pyc 0 0
file_importer.py 115 0
file_importer.pyc 0 0
graph_test.py 22 0
gt_file_importer.py 25 0
gt_file_importer.pyc 0 0
gt_start.py 55 0
indexing.py 27 0
indexing.pyc 0 0
log 644344 0
metric_calculator.py 403 0
metric_calculator.pyc 0 0
metrics.py 361 0
metrics.pyc 0 0
nohup.out 1252439 0
normalizations.py 37 0
normalizations.pyc 0 0
percolation.py 265 0
percolation.pyc 0 0
ru_file_importer.py 38 0
ru_file_importer.pyc 0 0
ru_metric_calculator.py 413 0
ru_metric_calculator.pyc 0 0
ru_start.py 56 0
start.py 56 0
statistics.py 58 0
statistics.pyc 0 0
test.py 31 0
visualization.py 160 0
visualization.pyc 0 0
File README.md added (mode: 100644) (index 0000000..cb89d83)
1 coria-backend
2 =============
3
4 Connectivity Risk Analysis Python Backend CORIA 2.0
5 usage: start.py [-h] filename
6
The diff for file RU_ASN.txt is too big (5570 changes) and cannot be shown.
File RedisHelpers/del_keys_in_db.py added (mode: 100644) (index 0000000..1007f72)
1 import argparse
2 import redis as rd
3
4 parser = argparse.ArgumentParser(description='Delete keys from the Redis DB matching a certain pattern')
5 parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
6 parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
7 args = parser.parse_args()
8 # reading arguments from parser
9 db_index = args.db_index
10 pattern = args.pattern
11
12 # defining redis
13 print 'Redis database:',db_index
14 redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
15 # finding keys matching pattern
16 print 'finding keys matching pattern', pattern
17 keys = redis.keys(pattern=pattern)
18 print len(keys),'keys matching pattern',pattern,'found'
19 # deleting keys
20 print 'deleting keys matching pattern', pattern
21 for key in keys:
22 redis.delete(key)
23 print len(keys),'keys matching pattern',pattern,'deleted'
24
File RedisHelpers/flush_db.py added (mode: 100644) (index 0000000..776409f)
1 import argparse
2 import redis as rd
3
4 parser = argparse.ArgumentParser(description='Flush the Redis DB.')
5 parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
6 args = parser.parse_args()
7 db_index = args.db_index
8
9 redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
10
11 # ask to clean all data in Redis
12 flush_flag = 'Flushing'
13 while (flush_flag != 'y' and flush_flag != 'n'):
14 flush_flag = raw_input("Are you sure you want to flush database "+str(db_index)+"? [y/n]")
15 if flush_flag == 'y':
16 print 'Flushing database',str(db_index)
17 redis.flushdb()
18 else:
19 print 'Aborting database flush'
File RedisHelpers/move_keys_from_db_to_db.py added (mode: 100644) (index 0000000..a5cc125)
1 import argparse
2 import redis as rd
3
4 parser = argparse.ArgumentParser(description='Migrate keys matching a pattern from one DB to another.')
5 parser.add_argument('db_source',metavar='db_source',type=int,help='Source Database Index')
6 parser.add_argument('db_target',metavar='db_target',type=int,help='Target Database Index')
7 parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
8 args = parser.parse_args()
9 # reading arguments from parser
10 db_source = args.db_source
11 db_target = args.db_target
12 pattern = args.pattern
13
14 redis_source = rd.StrictRedis(host='localhost', port=6379, db=db_source)
15 print "Redis source database is",str(db_source)
16 redis_target = rd.StrictRedis(host='localhost', port=6379, db=db_target)
17 print "Redis target database is",str(db_target)
18 print "Redis 2 defined"
19
20 # clean all data in Redis
21 flush_flag = 'Flushing'
22 while (flush_flag != 'y' and flush_flag != 'n'):
23 flush_flag = raw_input("Would you like to flush the target database before continuing? [y/n]")
24 if flush_flag == 'y':
25 redis_target.flushdb()
26 print "Redis target database flushed"
27
28 keys = redis_source.keys(pattern)
29 print len(keys),'keys matching pattern',pattern,'found'
30
31 for key in keys:
32 print key
33 redis_source.move(str(key), db_target)
34 #redis_source.migrate(host='localhost', port=6379, key=str(k), destination-db=db_target, timeout=3000,copy=1)
35
36 print len(keys),"keys from Redis source migrated to Redis target"
File RedisHelpers/redis_test.py added (mode: 100644) (index 0000000..51ff08d)
1 import argparse
2 import redis as rd
3
4 parser = argparse.ArgumentParser(description='Find keys matching a pattern in DB.')
5 #parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
6 parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
7 args = parser.parse_args()
8 # reading arguments from parser
9 pattern = args.pattern
10 keys = []
11 for i in range(0,10):
12 db_index = i
13
14 redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
15
16 temp_keys = redis.keys(pattern)
17 print len(temp_keys),'\tkeys matching pattern',pattern,'found in database',db_index
18 for key in temp_keys:
19 keys.append(key)
20
21 #for key in keys:
22 #print key
23 print len(keys),'\tkeys matching pattern',pattern,'found in total'
File RedisHelpers/search_db.py added (mode: 100644) (index 0000000..3ca1070)
1 import argparse
2 import redis as rd
3
4 parser = argparse.ArgumentParser(description='Find keys matching a pattern in DB.')
5 parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
6 parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
7 args = parser.parse_args()
8 # reading arguments from parser
9 db_index = args.db_index
10 pattern = args.pattern
11
12 redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
13 print "Redis database index:",str(db_index)
14
15 keys = redis.keys(pattern)
16 print len(keys),'keys matching pattern',pattern,'found'
17
18 for key in keys:
19 print key
20 print "\n",len(keys),'keys matching pattern',pattern,'found'
File advancedscores.py added (mode: 100644) (index 0000000..489636a)
1 # advancedscores.py
2 import numpy as np
3
4 ################
5 #advanced scores
6 ################
7
8 def adv_unified_risk_score(self):
9
10 #caching of all values in dictionaries
11 all_ccs_normalized = dict(self.redis.zrange(self.metric_prefix+'corrected_clustering_coefficient'+self.normalization_suffix, 0, -1, withscores=True, score_cast_func=float))
12 all_urs = dict(self.redis.zrange(self.score_prefix+'unified_risk_score', 0, -1, withscores=True, score_cast_func=float))
13
14 urs_percentile_10 = np.percentile(all_urs.values(), 10)
15 urs_percentile_90 = np.percentile(all_urs.values(), 90)
16
17 for node in self.nodes:
18 cc_normalized = all_ccs_normalized[str(node)]
19 urs = all_urs[str(node)]
20
21
22 if (urs >= urs_percentile_90 or urs <= urs_percentile_10):
23 if (cc_normalized >= 0.25):
24 advanced_unified_risk_score = ((urs * 3.0) + cc_normalized) / 4.0
25 else:
26 advanced_unified_risk_score = urs
27 else:
28 advanced_unified_risk_score = urs
29
30 #save for node
31 self.redis.hset(self.node_prefix+str(node), 'advanced_unified_risk_score', advanced_unified_risk_score)
32 #save for score
33 self.redis.zadd(self.score_prefix+'advanced_unified_risk_score', advanced_unified_risk_score, str(node))
File advancedscores.pyc added (mode: 100644) (index 0000000..51d9396)
File change_output_file.py added (mode: 100644) (index 0000000..985dee5)
1 import argparse
2 #import graph_tool.all as gt
3 #import redis as rd
4 #import numpy as np
5
6 parser = argparse.ArgumentParser(description='Change duration output file.')
7 parser.add_argument('file_name',metavar='file_name',type=str,help='File Name')
8 args = parser.parse_args()
9 file_name = args.file_name
10
11 print 'Opening file',file_name
12 input_file = open(file_name)
13
14
15 counter = -6
16 info = []
17 body = []
18 for line in input_file:
19 if counter < -1:
20 print 'Reading info'
21 info.append([field for field in line.strip().split("\t")])
22 counter += 1
23 elif counter == -1:
24 print 'Reading headers'
25 headers = line.strip().split("\t")
26 counter += 1
27 else:
28 body.append([field for field in line.strip().split("\t")])
29 counter += 1
30 print 'Reading row',counter,'of body'
31 print 'Closing input file'
32 input_file.close
33
34 print 'Sorting table'
35 body.sort(key=lambda row: row[3], reverse=True)
36 print 'Germanising: Replacing \".\" with \",\"'
37 body = [map(lambda item: str(item).replace(".",","),row) for row in body]
38
39 print 'Writing in new file'
40 output_file = open("_changed.".join(file_name.split(".")),"w")
41 for row in info:
42 print 'Writing row:',"\t".join(row)
43 output_file.write("\t".join(row)+"\n")
44 output_file.write("\t".join(headers)+"\n")
45 for row in body:
46 #for item in row:
47 # str(item).replace(".",",")
48 print 'Writing row:',"\t".join(row)
49 output_file.write("\t".join(row)+"\n")
50
51 output_file.close
52
File config.py added (mode: 100644) (index 0000000..59cc2dd)
1 #config.py
2 import metrics
3 import normalizations
4 import advancedscores
5 import percolation
6 import visualization
7
8 #redis keys for indexes and values
9 graph_index_key = 'all_graphs'
10
11 info_index_key = 'general_info'
12 node_index_key = 'all_nodes'
13 metric_index_key = 'all_metrics'
14 score_index_key = 'all_scores'
15 percolation_index_key = 'all_percolation_modes'
16 layout_index_key = 'all_layouts'
17
18 node_neighbors_prefix = 'node_neighbors:'
19 node_prefix = 'node_metrics:'
20 metric_prefix = 'metric:'
21 score_prefix = 'score:'
22 statistics_prefix = 'statistics:'
23 percolation_prefix = 'percolation:'
24
25 normalization_suffix = '_normalized'
26
27
28 # definition of all base metrics for which absolute values will be calculcated for each node in the first step
29 # key is the name of the metric and value is the implemented method which exposes the required interface
30 # interface: each method takes the node as the single parameter, performs the necessary calculation and
31 # returns a float containing the value for the specified node
32
33 base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient,
34 'degree' : metrics.degree,
35 # 'degree_(gt)' : metrics.degree_gt,
36 'average_neighbor_degree' : metrics.average_neighbor_degree,
37 'iterated_average_neighbor_degree' : metrics.iterated_average_neighbor_degree,
38 # 'iterated_average_neighbor_degree_(gt)': metrics.iterated_average_neighbor_degree,
39 # 'betweenness_centrality' : metrics.betweenness_centrality,
40 'betweenness_centrality_(gt)' : metrics.betweenness_centrality_gt,
41 # 'eccentricity' : metrics.eccentricity,
42 'eccentricity_(gt)' : metrics.eccentricity_gt,
43 # 'eccentricity_(gt)_s' : metrics.eccentricity_gt_s,
44 # 'average_shortest_path_length' : metrics.average_shortest_path_length,
45 'average_shortest_path_length_(gt)' : metrics.average_shortest_path_length_gt,
46 # 'average_shortest_path_length_(gt)_s' : metrics.average_shortest_path_length_gt_small_graphs,
47 'eigenvector_centrality_(gt)' : metrics.eigenvector_centrality_gt,
48 # 'eigenvector_centrality' : metrics.eigenvector_centrality,
49 # 'deterioration' : metrics.deterioration
50 }
51
52
53 # some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
54 # key is the metric name and value the method for correction
55
56 advanced_metrics = {'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient,
57 'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree,
58 'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree}
59
60
61 # for every metric, a normalization method has to be specified
62 # key is the name of the metric and value is the normalization method which also has to expose the required interface
63 # interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
64 # the method itself shall access the data which is required for normalization from the redis instance
65 # and the corresponding keys/values for the specified metric
66 # it shall then loop over all nodes and calculate the normalized value for the node and the metric
67 # afterwards it should save the result to redis using "metric_name_normalized" as the key
68 # the result is stored inside the node's hash for metrics
69
70 # also needs to include corrected metrics with their respective names
71 #
72 normalization_methods = { 'clustering_coefficient' : normalizations.min_max,
73 'corrected_clustering_coefficient' : normalizations.min_max,
74 'degree' : normalizations.min_max,
75 'degree_(gt)' : normalizations.min_max,
76 'average_neighbor_degree' : normalizations.min_max,
77 'corrected_average_neighbor_degree' : normalizations.min_max,
78 'iterated_average_neighbor_degree' : normalizations.min_max,
79 'iterated_average_neighbor_degree_(gt)' : normalizations.min_max,
80 'corrected_iterated_average_neighbor_degree': normalizations.min_max,
81 'betweenness_centrality' : normalizations.min_max,
82 'betweenness_centrality_(gt)' : normalizations.min_max,
83 'eccentricity' : normalizations.max_min,
84 'eccentricity_(gt)' : normalizations.max_min,
85 'eccentricity_(gt)_s' : normalizations.max_min,
86 'average_shortest_path_length' : normalizations.max_min,
87 'average_shortest_path_length_(gt)' : normalizations.max_min,
88 'average_shortest_path_length_(gt)_s' : normalizations.max_min,
89 'eigenvector_centrality_(gt)' : normalizations.min_max,
90 'eigenvector_centrality' : normalizations.min_max,
91 'deterioration' : normalizations.min_max
92 }
93
94
95 # the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
96 # such scores can easily be defined here
97 # note: names are not methods but redis keys
98
99 scores = {'unified_risk_score': { 'degree': 0.25,
100 'corrected_average_neighbor_degree': 0.15,
101 'corrected_iterated_average_neighbor_degree': 0.1,
102 'betweenness_centrality_(gt)': 0.25,
103 # 'eccentricity': 0.125,
104 'average_shortest_path_length_(gt)': 0.25}
105 }
106
107
108 # other scores might require a more sophisticated algorithm to be calculated
109 # such scores need to be added here and implemented like the example below
110
111 advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score}
112
113
114 # these are the different percolation modes with name as key and method as value
115 # advanced modes have sub-modes for e.g. each metric
116
117 percolation_modes = {'failure': percolation.failure,
118 'random_walk': percolation.random_walk,
119 'russian_shutoff': percolation.russian
120 }
121
122 advanced_percolation_modes = {'target_list': percolation.target_list,
123 'hybrid_mode': percolation.hybrid_mode
124 }
125
126 # layouts for graph visualization.
127 # note 1: ARF does not seem to work with most graphs (error message: non-invertible matrix)
128 # note 2: Fruchtermann-Rheingold layout (FRUCHT) takes up a high percentrage of computation time
129 visualization_layouts = {#'SFDP': visualization.sfdp,
130 'Radial': visualization.radial,
131 #'Random': visualization.random,
132 #'ARF': visualization.arf,
133 #'Fruchterman_Reingold':visualization.frucht
134 }
File config.pyc added (mode: 100644) (index 0000000..04b1176)
File file_importer.py added (mode: 100644) (index 0000000..88c66d8)
1 import networkx as nx
2 import graph_tool.all as gt
3
4 class FileImporter(object):
5 def __init__(self,filename):
6 # initialize data file to parse and new empty graph
7 print 'Starting file importer!'
8 self.data_file = open(filename)
9 self.graph = nx.Graph()
10 self.graph_gt = gt.Graph(directed=False)
11 self.graph_gt_labels = self.graph_gt.new_vertex_property("double")
12
13 def read(self):
14 for line in self.data_file:
15 print "Parsing line",line
16 self.parse_line(line)
17 return self.graph
18
19 def read_gt(self):
20 return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
21
22 def parse_line(self, line):
23 # split each line on tabstop
24 # first field specifies the source node
25 # second field specifies the target node
26
27 fields = line.strip().split("\t")
28 from_node = int(fields[0])
29 to_node = int(fields[1])
30
31 # print('\n')
32 # print('From node is',from_node)
33 # print('To node is',to_node)
34 # add edge to the networkx graph
35 if (from_node <> to_node):
36 self.graph.add_edge(from_node, to_node)
37 # print('Network X graph has the following number of nodes',self.graph.number_of_nodes())
38 # print('Network X graph has the following number of edges',self.graph.number_of_edges())
39
40
41
42 #add edge to the graph_tool graph and create a property map of labels
43 #check if nodes are already present and create new ones if not
44 #temp = gt.Graph(directed=False)
45 #temp_name = temp.new_vertex_property("string")
46 temp = self.graph_gt
47 temp_name = self.graph_gt_labels
48
49 check = None
50 if (from_node <> to_node): #check if from_node is the same as to_node
51 index_from = gt.find_vertex(temp,temp_name,from_node)
52 # print('Index from is',index_from)
53 index_to = gt.find_vertex(temp,temp_name,to_node)
54 # print('Index to is',index_to)
55 if (index_from == [] and index_to == []):
56 # print('No idences are found')
57 c1 = temp.add_vertex()
58 temp_name[temp.vertex(c1)] = from_node
59 # print('Temp_name is now',temp_name[temp.vertex(c1)])
60 c2 = temp.add_vertex()
61 temp_name[temp.vertex(c2)] = to_node
62 # print('Temp_name is now',temp_name[temp.vertex(c2)])
63 if (index_from <> [] and index_to == []) :
64 # print('Index from is')
65 # print(index_from[0])
66 c1 = index_from[0]
67 #print('C1 is',c1)
68 c2 = temp.add_vertex()
69 #print('C2 is'),
70 #print(c2)
71 temp_name[temp.vertex(c2)] = to_node
72 # print('Temp_name is now',temp_name[temp.vertex(c2)])
73 if (index_to <> [] and index_from ==[]) :
74 # print('Index to is')
75 # print(index_to[0])
76 c1 = temp.add_vertex()
77 c2 = index_to[0]
78 temp_name[temp.vertex(c1)] = from_node
79 # print('Temp_name is now',temp_name[temp.vertex(c1)])
80 if (index_from <> [] and index_to <> []) :
81 # print('Both vertices found')
82 c1 = index_to[0]
83 c2 = index_from[0]
84 check = temp.edge(c1,c2) #check if the edge is already present
85 # print('Check is',check)
86 if (check == None):
87 # print("Adding edge between",c1,"and",c2)
88 temp.add_edge(c1, c2)
89
90 #print(temp_name)
91 self.graph_gt = temp
92 self.graph_gt_labels = temp_name
93
94 # Check whether GT and NetworkX graphs have the same number of nodes and edges
95 # if (self.graph_gt.num_vertices() <> self.graph.number_of_nodes()):
96 # print('Unequal number of vertices detected at from node',from_node,'to node',to_node)
97 # print('Number of vertices in Gt Graph is',self.graph_gt.num_vertices())
98 # print('Number of vertices in NetworkX is',self.graph.number_of_nodes())
99 # else:
100 # print('Equal number of vertices in both graphs')
101
102 # if (self.graph_gt.num_edges() <> self.graph.number_of_edges()):
103 # print('Unequal number of edges detected at from node',from_node,'to node',to_node)
104 # print('Number of vertices in Gt Graph is',self.graph_gt.num_edges())
105 # print('Number of vertices in NetworkX is',self.graph.number_of_edges())
106 # else:
107 # print('Equal number of edges in both graphs')
108
109 # if (self.graph.number_of_nodes() <> self.graph_gt.
110 # print('Graph tool graph is',self.graph_gt)
111 # print('Graph tool labels map is',self.graph_gt_labels)
112
113
114
115
File file_importer.pyc added (mode: 100644) (index 0000000..9b19358)
File graph_test.py added (mode: 100644) (index 0000000..df288a7)
1 import argparse
2 import graph_tool.all as gt
3 import datetime as dt
4 #import redis as rd
5 #import numpy as np
6
7 parser = argparse.ArgumentParser(description='Writing summary of graph-tool gt-file')
8 parser.add_argument('file_name',metavar='file_name',type=str,help='File Name')
9 args = parser.parse_args()
10 file_name = args.file_name
11 #start_time = dt.datetime.now()
12
13 print 'Opening file',file_name
14
15 g = gt.load_graph(file_name)
16 #delta = dt.datetime.now() - start_time
17 #delta_s = delta.total_seconds()
18
19 print "# properties:",g.list_properties()
20 print "# vertices:",g.num_vertices()
21 print "# edges:",g.num_edges()
22 #print "time taken:",delta_s
File gt_file_importer.py added (mode: 100644) (index 0000000..ae318be)
1 import networkx as nx
2 import graph_tool.all as gt
3
4 class FileImporter(object):
5 def __init__(self,filename):
6 # read graph from *.gt file (graph-tool file) and initialize empty networkx graph
7 print 'Starting file importer!'
8 self.graph_gt = gt.load_graph(filename)
9 self.graph = nx.Graph()
10 self.graph_gt_labels = self.graph_gt.vp.label_map
11
12 def read(self):
13 # reconstruct networkx graph from graph-tool graph
14 for edge in self.graph_gt.edges():
15 from_node = int(self.graph_gt_labels[edge.source()])
16 to_node = int(self.graph_gt_labels[edge.target()])
17 print "Creating edge from node",from_node,"to node",to_node
18 if (from_node != to_node):
19 self.graph.add_edge(from_node, to_node)
20 return self.graph
21
22 def read_gt(self):
23 return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
24
25
File gt_file_importer.pyc added (mode: 100644) (index 0000000..12f8577)
File gt_start.py added (mode: 100644) (index 0000000..728489f)
1 #!/usr/bin/env python
2 import datetime
3 import argparse
4 import cProfile, pstats, StringIO
5 from gt_file_importer import FileImporter
6 from metric_calculator import MetricCalculator
7 import datetime as dt
8
9 print 'Starting metric calculation',dt.datetime.now()
10 ## added this line of code ###
11 start_time = dt.datetime.now()
12 ##############################
13 parser = argparse.ArgumentParser(description='Read a Tab-separated Graph Datafile and start Calculation of Metrics and Statistics as configured in config.py')
14
15 parser.add_argument('filename', metavar='filename', type=str,
16 help='the name of the data file containing tab separated node ids')
17
18 parser.add_argument('--profiling',dest='profiling',action='store_true', help='enable runtime profiling into profiling.txt file')
19
20 args = parser.parse_args()
21
22 if args.profiling:
23 pr = cProfile.Profile()
24 s = StringIO.StringIO()
25 timestamp = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
26 outfile = open('profiling_output_'+timestamp+'.txt', 'w')
27 pr.enable()
28
29 fi = FileImporter(args.filename)
30 graph = fi.read()
31 #print('This should be a Network X graph',graph)
32 print 'Network X graph has the following number of nodes',graph.number_of_nodes()
33 print 'Network X graph has the following number of edges',graph.number_of_edges()
34 graph_gt = fi.read_gt()
35 print 'Graph tool graph has the following number of nodes',graph_gt['graph_gt'].num_vertices()
36 print 'Graph tool graph has the following number of edges',graph_gt['graph_gt'].num_edges()
37 #print('Gt graph has the following properties')
38
39 time_delta = dt.datetime.now() - start_time
40 print 'Time taken to create graphs:',time_delta
41
42 mc = MetricCalculator(graph,graph_gt)
43 mc.start()
44
45 if args.profiling:
46 ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
47 ps.print_stats()
48 outfile.write(s.getvalue())
49
50 ## added this line of code ###
51 time_delta = dt.datetime.now() - start_time
52 ##############################
53 print 'Ending metric calculation',dt.datetime.now()
54 ## and this line below #######
55 print 'Time taken to calculate:',time_delta
File indexing.py added (mode: 100644) (index 0000000..28f726f)
1 #indexing
2 def index_graph(self):
3 self.redis.sadd(self.graph_index_key, self.graph_name)
4
5 def index_nodes(self):
6 for node in self.nodes:
7 self.redis.sadd(self.node_index_key, node)
8
9 def index_neighbors(self):
10 for node in self.nodes:
11 node_neighbors = self.graph.neighbors(int(node))
12 for neighbor in node_neighbors:
13 self.redis.sadd(self.node_neighbors_prefix+str(node), neighbor)
14
15 def index_metrics(self):
16 for metric in self.base_metrics:
17 self.redis.sadd(self.metric_index_key, metric)
18
19 for advanced_metric in self.advanced_metrics:
20 self.redis.sadd(self.metric_index_key, advanced_metric)
21
22 def index_scores(self):
23 for score in self.scores:
24 self.redis.sadd(self.score_index_key, score)
25
26 for advanced_score in self.advanced_scores:
27 self.redis.sadd(self.score_index_key, advanced_score)
File indexing.pyc added (mode: 100644) (index 0000000..68587bb)
The diff for file log is too big (644344 changes) and cannot be shown.
File metric_calculator.py added (mode: 100644) (index 0000000..26c1f58)
1 import networkx as nx
2 import graph_tool.all as gt
3 import redis as rd
4 import numpy as np
5 import indexing
6 import statistics
7 import normalizations
8 import config
9 import percolation
10 import visualization
11 import datetime as dt
12
13
14 class MetricCalculator(object):
15 def __init__ (self, graph, graph_gt):
16 #class constructor
17 #define required class variables such as the graph to work on, the redis connection and the nodes of the graph
18
19 print ('Starting metric_calculator!')
20
21 # for code evaluation
22 self.start_time = dt.datetime.now()
23 self.durations = {}
24 self.durations_in_seconds = {}
25 self.durations_in_percent = {}
26
27 self.graph = graph
28 self.graph_gt = graph_gt
29
30 # alternate name for graph tool graph
31 self.g = self.graph_gt['graph_gt']
32 # alternate name for graph tool labels
33 self.g.vp.label_map = self.graph_gt['graph_gt_labels']
34 self.label_map = self.g.vp.label_map
35 # vertex property map for percolation calculations
36 self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
37 self.exclusion_map = self.g.vp.exmap
38 self.exclusion_map.a = 1 #initialise filter map
39 #find largest component of graph tool graph for percolation calculations
40 # percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
41 self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
42
43
44 self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
45 self.nodes = nx.nodes(graph)
46
47
48 # configuration variables are read from the config file and are also saved to class variables for easy access
49 self.graph_index_key = config.graph_index_key
50
51 self.graph_name = ''
52 while (self.graph_name == ''):
53 self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
54
55 self.info_index_key = self.graph_name+':'+config.info_index_key
56 self.node_index_key = self.graph_name+':'+config.node_index_key
57 self.metric_index_key = self.graph_name+':'+config.metric_index_key
58 self.score_index_key = self.graph_name+':'+config.score_index_key
59 self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
60 self.layout_index_key = self.graph_name+':'+config.layout_index_key
61
62 self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
63 self.node_prefix = self.graph_name+':'+config.node_prefix
64 self.metric_prefix = self.graph_name+':'+config.metric_prefix
65 self.score_prefix = self.graph_name+':'+config.score_prefix
66 self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
67 self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
68
69 self.normalization_suffix = config.normalization_suffix
70
71 self.base_metrics = config.base_metrics
72 self.advanced_metrics = config.advanced_metrics
73
74 self.normalization_methods = config.normalization_methods
75
76 self.scores = config.scores
77 self.advanced_scores = config.advanced_scores
78
79 self.visualization_layouts = config.visualization_layouts
80
81 self.percolation_modes = config.percolation_modes
82 self.advanced_percolation_modes = config.advanced_percolation_modes
83
84 ##############################################################################
85 ###### start describes the entire calculation in a high level overview #######
86 ##############################################################################
87
88 def start(self):
89 start_time_calculation = dt.datetime.now()
90
91 #preliminary calculations
92 self.flush_database()
93 self.obtain_percentages()
94 self.create_info()
95 self.create_standard_layout()
96 self.save_graph_data('raw')
97
98 #index creation
99 self.create_indexes()
100
101 #main calculations
102 self.calculate_metrics()
103 self.calculate_advanced_metrics()
104 self.normalize_metrics()
105 self.calculate_scores()
106 self.calculate_advanced_scores()
107
108 #statistics
109 self.calculate_statistics()
110
111 #dynamic metrics / percolation
112 self.calculate_percolation()
113
114 #visualization
115 self.visualize_graph()
116
117 #save final graph
118 self.save_graph_data('full')
119
120 #evaluation
121 self.duration_total = dt.datetime.now() - start_time_calculation
122 self.evaluate_durations()
123
124
125 ###################
126 ## PRELIMINARIES ##
127 ###################
128 def flush_database(self):
129 # ask to clean all data in Redis
130 flush_flag = 'Flushing'
131 while (flush_flag != 'y' and flush_flag != 'n'):
132 flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
133 if flush_flag == 'y':
134 self.redis.flushdb()
135
136 def obtain_percentages(self):
137 # obtain percentages for calculation of deterioration #
138 # and calculate number of nodes to remove from graph ##
139 percentages = '' # initialise
140 while (percentages == ''):
141 percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
142
143 percentages = sorted([float(pct)for pct in percentages.split()])
144 numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
145 # create a dictionary of percentages and corresponding numbers of nodes
146 self.percentages = dict(zip(numbers,percentages))
147 # storing values in redis DB
148 self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
149
150 def create_info(self):
151 #store general info about graph
152 self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
153 self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
154
155 def create_standard_layout(self):
156 # create a standard layout
157 start_time = dt.datetime.now()
158 print 'Creating standard layout for graph visualization.'
159 if not hasattr(self.g.vp, 'sfdp'):
160 self.sfdp = gt.sfdp_layout(self.g, C=0.5)
161 self.g.vp['sfdp'] = self.sfdp
162 else:
163 self.sfdp = self.g.vp['sfdp']
164 self.durations['SFDP_layout'] = dt.datetime.now() - start_time
165
166 def save_graph_data(self,name):
167 # save graph
168 start_time = dt.datetime.now()
169 print 'Saving raw graph data'
170 self.g.save(self.graph_name+'_'+name+'.gt.gz')
171 self.durations['saving_graph'+name] = dt.datetime.now() - start_time
172
173 ##################
174 #### INDEXING ####
175 ##################
176 def create_indexes(self):
177 start_time = dt.datetime.now()
178 #call methods defined in indexing.py
179 indexing.index_graph(self)
180 indexing.index_nodes(self)
181 indexing.index_neighbors(self)
182 indexing.index_metrics(self)
183 indexing.index_scores(self)
184 #indexing.index_percolation(self)
185 self.durations['indexing'] = dt.datetime.now() - start_time
186
187 ###########################
188 #### CALCULATION LOOPS ####
189 ###########################
190
191 def calculate_metrics(self):
192 start_time_total = dt.datetime.now()
193 # loop through all defined metrics and call specified calculation method for each node
194 print ('Starting calculate_metrics')
195 for metric_name in self.base_metrics:
196 start_time = dt.datetime.now()
197 metric_method = self.base_metrics[metric_name]
198
199 # loop through all nodes
200 for node in self.nodes:
201 # call calculation method of supplied metric for current node
202 node = int(node)
203 value = float(metric_method(self,node))
204
205 #store result in node values
206 self.redis.hset(self.node_prefix+str(node), metric_name, value)
207
208 #also store result to metric set
209 self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
210 self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
211 self.durations['metrics_total'] = dt.datetime.now() - start_time_total
212
213
214 def calculate_advanced_metrics(self):
215 start_time_total = dt.datetime.now()
216 # loop through all defined_advanced_metrics and call specified calculation method
217 print ('Starting calculate_advanced_metrics')
218 for advanced_metric_name in self.advanced_metrics:
219 start_time = dt.datetime.now()
220 metric_method = self.advanced_metrics[advanced_metric_name]
221
222 # loop through all nodes
223 for node in self.nodes:
224 node = int(node)
225 value = float(metric_method(self,node))
226
227 #store result in node values
228 self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
229
230 #also store result to metric set
231 self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
232 self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
233 self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
234
235
236 # loop through all defined normalizations and call respective normalization method
237 # no default normalizations for metrics not listed in the "normalization_methods" hash
238 def normalize_metrics(self):
239 start_time = dt.datetime.now()
240 #fallback normalization: min-max
241 print ('Starting normalize_metrics')
242 all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
243
244 for metric_name in all_metrics:
245 if self.normalization_methods.has_key(metric_name):
246 normalization_method = self.normalization_methods[metric_name]
247 else:
248 #fallback normalization is min-max
249 normalization_method = normalizations.min_max
250 normalization_method(self,metric_name)
251
252 self.durations['normalizing'] = dt.datetime.now() - start_time
253
254
255 def calculate_scores(self):
256 start_time = dt.datetime.now()
257 print ('Starting calculate_scores')
258 for score_name in self.scores:
259 metrics_with_weights = self.scores[score_name]
260
261 for node in self.nodes:
262 score_value = 0.0
263
264 # get normalized values
265 for metric in metrics_with_weights:
266 weight = self.scores[score_name][metric]
267 value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
268 score_value += weight * value
269
270 #redis_server.hset(key, value, number);
271 self.redis.hset(self.node_prefix+str(node),score_name, score_value)
272
273 self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
274
275 self.durations['scores'] = dt.datetime.now() - start_time
276
277 def calculate_advanced_scores(self):
278 start_time = dt.datetime.now()
279 print ('Starting calculate_advanced_scores')
280 for advanced_score in self.advanced_scores:
281 self.advanced_scores[advanced_score](self)
282
283 self.durations['adv_scores'] = dt.datetime.now() - start_time
284
285
286 #############
287 # statistics
288 #############
289
290 def calculate_statistics(self):
291 start_time = dt.datetime.now()
292 print ('Starting calculate_statistics')
293 for metric in self.base_metrics:
294 #absolute and normalized
295 statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
296 statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
297
298 for advanced_metric in self.advanced_metrics:
299 #absolute and normalized
300 statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
301 statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
302
303 for score in self.scores:
304 statistics.calculate_statistics(self, score, self.score_prefix+score)
305
306 for advanced_score in self.advanced_scores:
307 statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
308 self.durations['statistics:stats'] = dt.datetime.now() - start_time
309
310 start_time = dt.datetime.now()
311 statistics.calculate_correlations(self)
312 self.durations['statistics:corr'] = dt.datetime.now() - start_time
313
314 ###################
315 # dynamic metrics #
316 ###################
317
318 def calculate_percolation(self):
319 start_time_total = dt.datetime.now()
320 print ('Starting percolation calculation')
321
322 # shorten the name for percentages and corresponding numbers of nodes to remove
323 n = self.percentages
324
325 # BASIC PERCOLATION MODES
326 # basic percolation modes take mode_name and n as input and return a #
327 # dictionary with percentage of nodes removed as key and percentage ##
328 # of deterioration as value
329 for mode_name in self.percolation_modes:
330 start_time = dt.datetime.now()
331 # initialise exlusion vertex property map
332 self.exclusion_map.a = 1
333 # read method from config file
334 mode_method = self.percolation_modes[mode_name]
335 # execute method
336 results = mode_method(self,mode_name,n)
337 # index percolation mode
338 self.redis.sadd(self.percolation_index_key, mode_name)
339 # store values
340 print 'Storing percolation percentages'
341 for percentage in results:
342 value = results[percentage]
343 #store in hash set
344 self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
345
346 self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
347
348 # ADVANCED PERCOLATION MODES
349 # advanced percolation modes take mode_name and n as input and return a ###
350 # dictionary with groups of percolation modes (e.g. metrics, countries) ###
351 # as keys and dictionaries of percentages (removed: deteriorated) as values
352 for mode_name in self.advanced_percolation_modes:
353 start_time = dt.datetime.now()
354 # initialise exlusion vertex property map
355 self.exclusion_map.a = 1
356 # read method from config file
357 mode_method = self.advanced_percolation_modes[mode_name]
358 # execute method
359 results = mode_method(self,mode_name,n)
360
361 # store values
362 print 'Storing percolation percentages'
363 for group in results:
364 # index percolation modes
365 self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
366 for percentage in results[group]:
367 value = results[group][percentage]
368 #store in hash set
369 self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
370
371 self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
372
373 self.durations['percolation_total'] = dt.datetime.now() - start_time_total
374
375
376 def visualize_graph(self):
377
378 for layout_name in self.visualization_layouts:
379 start_time = dt.datetime.now()
380 print 'Creating visualisation with '+layout_name+' layout'
381
382 layout_method = self.visualization_layouts[layout_name]
383 pos = layout_method(self)
384 gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
385
386 self.redis.sadd(self.layout_index_key, layout_name)
387 self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
388
389 def evaluate_durations(self):
390 #print out times taken
391 print 'times taken:'
392 output = open(str(self.graph_name)+"_duration_test.txt","w")
393 output.write("Graph Name:\t"+str(self.graph_name)+"\n")
394 output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
395 output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
396 output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
397 output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
398 for key in self.durations:
399 self.durations_in_seconds[key] = self.durations[key].total_seconds()
400 self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
401
402 print str(key)+'\t'+str(self.durations_in_percent[key])
403 output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')
File metric_calculator.pyc added (mode: 100644) (index 0000000..b7d78f3)
File metrics.py added (mode: 100644) (index 0000000..6692689)
1 #metrics.py
2 import networkx as nx
3 import numpy as np
4 import datetime as dt
5 import graph_tool.all as gt
6
7 def clustering_coefficient(self,node):
8 print 'Calculating clustering_coefficient for node',node
9 #in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
10 #NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
11 # in this case, just returning nx.clustering(self.graph, node) might be better
12 if not hasattr(self, 'all_clustering_coefficients'):
13 self.all_clustering_coefficients = nx.clustering(self.graph)
14
15 #get the actual value from the pre-calculated hash
16 return self.all_clustering_coefficients[node]
17
18 def degree(self, node):
19 print 'Calculating degree for node', node
20 return self.graph.degree(node)
21
22 def degree_gt(self, node):
23 print 'Calculating degree with graph tool for node', node
24 # find index of node
25 node_index = gt.find_vertex(self.g, self.label_map, node)[0]
26
27 # calculate degree for all nodes
28 if not hasattr(self.g.vp, 'degree'):
29 self.g.vp['degree'] = self.g.degree_property_map("total")
30
31 return self.g.vp.degree[node_index]
32
33 def eigenvector_centrality_gt(self, node):
34 print 'Calculating eigenvector centrality with graph_tool for node', node
35
36 if not hasattr(self.g.vertex_properties, 'eigenvector'):
37 eigenvalue, eigenvector = gt.eigenvector(self.g)
38 self.g.vertex_properties.eigenvector = eigenvector
39 self.eigenvalue = eigenvalue
40
41 node_index = gt.find_vertex(self.g, self.label_map,node)[0]
42
43 # this has been adjusted with eigenvalue for nicer values
44 return self.g.vp.eigenvector[self.g.vertex(node_index)]*float(self.eigenvalue)
45
46 def eigenvector_centrality(self, node):
47 print 'Calculating eigenvector centrality for node', node
48
49 if not hasattr(self, 'all_eigenvector_centralities'):
50 self.all_eigenvector_centralities = nx.eigenvector_centrality(self.graph,max_iter=100000)
51
52 return self.all_eigenvector_centralities[node]
53
54 def average_neighbor_degree(self,node):
55 print 'Calculating average_neighbour_degree for node',node
56 # same caching technique as in self.clustering_coefficient
57 # might also break for very large graphs
58 # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go
59
60 if not hasattr(self, 'all_average_neighbor_degrees'):
61 self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
62 return self.all_average_neighbor_degrees[node]
63
64 def iterated_average_neighbor_degree(self, node):
65 print 'Calculating iterated_average_neighbor degree for node',node
66 result = 0 # initialise
67
68 first_level_neighbors = self.graph.neighbors(node)
69 # print ('First level neigbors are', first_level_neighbors)
70 if len(first_level_neighbors) != 0:
71 second_level_neighbors = []
72 # print ('Second level neigbors are', second_level_neighbors)
73 # get all two-hop nodes
74 for first_level_neighbor in first_level_neighbors:
75 current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
76 second_level_neighbors.extend(current_second_level_neighbors)
77
78 #remove one-hop nodes and self
79 relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
80
81 if len(relevant_nodes) != 0:
82 degree_sum = 0
83 for relevant_node in relevant_nodes:
84 degree_sum += self.graph.degree(relevant_node)
85 result = float(degree_sum)/float(len(relevant_nodes))
86 return result
87
88 def iterated_average_neighbour_degree_gt(self, node):
89 print 'Calculating iterated_average_neighbour degree with graph tool for node',node
90
91 result = 0 # initialise
92
93 vertex = gt.find_vertex(self.g, self.label_map, node)[0]
94 first_level_neighbours = vertex.all_neighbors()
95
96 if len(first_level_neighbours) != 0:
97 second_level_neighbours = []
98 # get all two-hop nodes
99 for first_level_neighbour in first_level_neighbours:
100 current_second_level_neighbours = first_level_neighbour.all_neighbours()
101 second_level_neighbours.extend(current_second_level_neighbours)
102
103 #remove one-hop nodes and self
104 relevant_vertices = set(second_level_neighbours) - set(first_level_neighbours) - set([vertex])
105
106 if len(relevant_vertices) != 0:
107 # if degree has not been calculated, yet, calculate degree for all nodes
108 if not hasattr(self.g.vp, 'degree'):
109 self.g.vp['degree'] = self.g.degree_property_map("total")
110
111 degree_sum = 0 # initialise
112 for relevant_vertex in relevant_vertices:
113 degree_sum += self.g.vp.degree[relevant_vertex]
114 result = float(degree_sum)/float(len(relevant_vertices))
115 return result
116
117 def eccentricity(self, node):
118 print 'Calculating eccentricity for node', node
119 if not hasattr(self, 'all_eccentricities'):
120 l = gt.label_largest_component(self.g) #find the largest component
121 print ('Found the largest component')
122 # print ("Printing labeled largest component",l.a)
123 u = gt.GraphView(self.g, vfilt=l) # extract the largest component as a graph
124 print 'The number of vertices in the largest component is', u.num_vertices()
125 print 'The number of vertices in the original graph is', self.g.num_vertices()
126 # if nx.is_connected(self.graph) == True:
127 if (u.num_vertices() == nx.number_of_nodes(self.graph)):
128 print ("Graph is connected")
129 self.all_eccentricities = nx.eccentricity(self.graph)
130 print ("Calculated all eccentricities")
131 # print("Eccentricities are",self.all_eccentricities)
132 return self.all_eccentricities[node]
133 else:
134 # return 0
135 print("Graph is disconnected")
136 self.all_eccentricities = {}
137 if (self.all_eccentricities != {}):
138 print("Returning eccentricity for",node,"-",self.all_eccentricities[node])
139 return self.all_eccentricities[node]
140 else:
141 print("Returning 0")
142 return 0
143
144 def eccentricity_gt(self, node):
145 print 'Calculating eccentricity with graph tool for node', node
146
147 #find index of node
148 node_index = gt.find_vertex(self.g, self.label_map, node)[0]
149
150 if not hasattr(self.g.gp,'pseudo_diameter'):
151 # find approx. diameter
152 print 'Finding maximum distance for walk'
153 self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
154 self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
155 # endpoints will not be used
156
157 #find all distances from node
158 distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
159 #calculate maximum
160 maximum = np.ma.max(np.ma.masked_where(distances > 2147483646, distances),0)
161 return maximum
162
163 def eccentricity_gt_s(self, node):
164 print 'Calculating eccentricity for small graphs with graph tool for node', node
165 eccentricity = 0 # initialise
166
167 #find index of node
168 node_index = gt.find_vertex(self.g, self.label_map, node)[0]
169 #get all shortest path lengths
170 if not hasattr(self, 'all_distances'):
171 self.all_distances = gt.shortest_distance(self.g)
172
173 for distance in self.all_distances[node_index]:
174 if distance < 2147483647: # disregard all nodes which are not accessible
175 eccentricity = max(eccentricity, distance)
176 return eccentricity
177
178 def betweenness_centrality(self, node):
179 print 'Calculating betweenness_centrality for node',node
180 if not hasattr(self, 'all_betweenness_centralities'):
181 self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
182 return self.all_betweenness_centralities[node]
183
184
185 def betweenness_centrality_gt(self, node):
186 print 'Calculating betweenness_centrality with graph_tool for node',node
187 # print('Self is',self.graph_gt['graph_gt'])
188 # print('Self is also',self.graph_gt['graph_gt_labels'])
189 # def convert_graph(g):
190 #converts a networkX graph to graph_tool
191 #important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
192 # adj = nx.adjacency_matrix(g)
193 # j = gt.Graph(directed=False)
194 # j.add_vertex(len(adj))
195 # num_vertices = adj.shape[0]
196 # for i in range(num_vertices - 1):
197 # for l in range(i + 1, num_vertices):
198 # if adj[i,l] != 0:
199 # j.add_edge(i, l)
200 # return j
201
202
203 if not hasattr(self.g.vertex_properties, 'betweenness'):
204 vp,ep = gt.betweenness(self.g)
205 # internalize property maps
206 self.g.vertex_properties.betweenness = vp
207 self.g.edge_properties.betweenness = ep
208 node_index = gt.find_vertex(self.g,self.label_map,node)[0]
209 # print("Node",node,"has index",node_label)
210 # print('Vp is',vp)
211 # print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
212
213 return self.g.vp.betweenness[self.g.vertex(node_index)]
214
215 def average_shortest_path_length(self, node):
216 print 'Calculating average_shortest_path_length for node',node
217 # caching average_shortest_path_length for all nodes at one failed
218 # already switched to single calculation
219
220 #get all shortest path lengths
221 all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)
222
223 #calculate average
224 sum_of_lengths = 0
225 for target in all_shortest_path_lengths_for_node:
226 sum_of_lengths += all_shortest_path_lengths_for_node[target]
227
228 return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)
229
230 def average_shortest_path_length_gt(self, node):
231 print 'Calculating average_shortest_path_length with graph tool for node',node
232 #find index of node
233 node_index = gt.find_vertex(self.g, self.label_map, node)[0]
234
235 if not hasattr(self.g.gp,'pseudo_diameter'):
236 # find approx. diameter
237 print 'Finding maximum distance for walk'
238 self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
239 self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
240 # endpoints will not be used
241
242 #find all distances from node
243 distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
244 #calculate average
245 average = np.ma.average(np.ma.masked_where(distances > 2147483646, distances))
246 return float(average)
247
248 def average_shortest_path_length_gt_small_graphs(self, node):
249 print 'Calculating average_shortest_path_length for small graphs with graph tool for node',node
250 result = 0 # initialise
251
252 #find index of node
253 node_index = gt.find_vertex(self.g, self.label_map, node)[0]
254 #get all shortest path lengths
255 if not hasattr(self, 'all_distances'):
256 self.all_distances = gt.shortest_distance(self.g)
257
258 distances = self.all_distances[node_index]
259 #calculate average
260 sum_of_distances = 0
261 accessible_nodes = 0
262 for distance in distances:
263 if distance < 2147483647: # disregard all nodes in other components
264 sum_of_distances += distance
265 accessible_nodes += 1
266 if accessible_nodes != 0:
267 result = float(sum_of_distances)/float(accessible_nodes)
268 return result
269
270 def deterioration(self, node):
271 print'Calculating deterioration due to removal of node', node
272
273 #g = self.graph_gt['graph_gt']
274 #g.vp.temp = g.new_vertex_property("bool") #create property map for exclusion
275 #g.vp.temp.a = 1 #initialise filter map
276 node_index = gt.find_vertex(self.g, self.label_map, node)[0]
277 self.exclusion_map[node_index] = 0 #take out node
278 u = gt.GraphView(self.g, vfilt = self.exclusion_map)
279 u = gt.GraphView(self.g, vfilt = gt.label_largest_component(u))
280 p = 100.0*(1.0-float(u.num_vertices())/float(self.glc.num_vertices()))
281 self.exclusion_map[node_index] = 1 #reset node
282
283 return p
284
285 #############
286 # advanced metrics
287 #############
288 def correct_clustering_coefficient(self,node):
289 print 'Calculating correct_clustering_coefficient for node',node
290 clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
291 degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
292 max_degree = self.redis.zrange(self.metric_prefix+'degree', -1, -1, withscores=True, score_cast_func=float)[0][1]
293 corrected_cc = clustering_coefficient * np.log(degree) / np.log(max_degree)
294 return corrected_cc
295
296 def correct_clustering_coefficient_old(self,node):
297 print 'Calculating correct_clustering_coefficient for node',node
298 clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
299 degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
300 corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
301 return corrected_cc
302
303 def correct_average_neighbor_degree(self,node):
304 print 'Calculating correct_average_neighbor degree for node',node
305 avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
306
307 if avgnd == 0.0:
308 result = avgnd
309 else:
310 neighbors = self.graph.neighbors(node)
311 number_of_neighbors = float(len(neighbors))
312 if number_of_neighbors == 0.0:
313 result = avgnd
314 else:
315 neighbor_degrees = []
316 for neighbor in neighbors:
317 neighbor_degrees.append(self.graph.degree(neighbor))
318
319 #using numpy median and standard deviation implementation
320 numpy_neighbor_degrees = np.array(neighbor_degrees)
321 standard_deviation = np.std(numpy_neighbor_degrees)
322 if standard_deviation == 0.0:
323 result = avgnd
324 else:
325 median = np.median(numpy_neighbor_degrees)
326 result = avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
327 return result
328
329 def correct_iterated_average_neighbor_degree(self, node):
330 print 'Calculating correct_iterated_average_neighbor_degree for node '+str(node)
331 iand = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
332 ciand = iand
333 if iand != 0.0:
334 first_level_neighbors = self.graph.neighbors(node)
335 second_level_neighbors = []
336
337 # get all two-hop nodes
338 for first_level_neighbor in first_level_neighbors:
339 current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
340 second_level_neighbors.extend(current_second_level_neighbors)
341
342 #remove one-hop neighbors and self
343 relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
344
345 if len(relevant_nodes) != 0:
346 node_degrees = []
347 for relevant_node in relevant_nodes:
348 node_degrees.append(self.graph.degree(relevant_node))
349
350 numpy_node_degrees = np.array(node_degrees)
351 standard_deviation = np.std(numpy_node_degrees)
352 if standard_deviation != 0.0:
353 median = np.median(numpy_node_degrees)
354 ciand = iand + ( ((median - iand) / standard_deviation) / float(len(relevant_nodes)) ) * iand
355 return ciand
356
357
358
359
360
361
File metrics.pyc added (mode: 100644) (index 0000000..a564700)
The diff for file nohup.out is too big (1252439 changes) and cannot be shown.
File normalizations.py added (mode: 100644) (index 0000000..a959a8c)
1 #normalizations.py
2 def min_max(self,metric_name):
3 #perform min max normalization of specified metric for all nodes
4 #min_max normalization
5 #get min and max from redis
6 x_min = self.redis.zrange(self.metric_prefix+metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
7 x_max = self.redis.zrange(self.metric_prefix+metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
8
9 #print x_min
10 #print x_max
11
12 for node in self.nodes:
13 if x_min == x_max:
14 x_normalized = 1.0
15 else:
16 x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
17 x_normalized = (x - x_min) / (x_max - x_min)
18
19 #store value for node and metric
20 self.redis.zadd(self.metric_prefix+metric_name+self.normalization_suffix, x_normalized, str(node))
21 self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
22
23 #max min normalization
24 def max_min(self,metric_name):
25 x_min = self.redis.zrange(self.metric_prefix+metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
26 x_max = self.redis.zrange(self.metric_prefix+metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
27
28 for node in self.nodes:
29 if x_min == x_max:
30 x_normalized = 1.0
31 else:
32 x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
33 x_normalized = (x_max - x) / (x_max - x_min)
34
35 #store value for node and metric
36 self.redis.zadd(self.metric_prefix+metric_name+self.normalization_suffix, x_normalized, str(node))
37 self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
File normalizations.pyc added (mode: 100644) (index 0000000..85e8e32)
File percolation.py added (mode: 100644) (index 0000000..7038340)
1 import graph_tool.all as gt
2 import numpy as np
3 import datetime as dt
4 import visualization
5
6 ###############################################
7 ### NOTE: We use the largest component, not ###
8 ### the entire graph for the calculation ###
9 ###############################################
10
11 ###############################################
12 ### functions used by all percolation modes ###
13 ###############################################
14 def percolation(percolated_graph,intact_graph):
15 return 100.0*(1.0-float(percolated_graph.num_vertices())/float(intact_graph.num_vertices()))
16
17 def print_info(flc, glc):
18 print 'filtered graph - vertices: '+str(flc.num_vertices())+' / edges: '+str(flc.num_edges())
19 print 'percolation: '+str(percolation(flc,glc))+'%'
20
21 # the function below was needed in previous versions of CoRiA because the set of set members was nested within another set
22 #def read_redis_smembers(redis,key):
23 # s = redis.smembers(key) #read set
24 #return [i.strip() for i in [l.strip('[]').split(',') for l in s][0]] #write list and strip of useless characters
25
26
27 #################################
28 ####### percolation modes #######
29 #################################
30
31 # These percolation modes take as input the mode name and n - a dictionary of
32 # numbers of nodes to take out (as keys) and corresponding percentages.
33 # They return a dictionary of percentage keys and percolation values.
34 # Advanced percolation modes nest this dictionary within a dictionary of groups.
35 # Therefore, they require a loop over these groups, which can be e.g. metrics or countries.
36
37 #################################
38 #### BASIC PERCOLATION MODES ####
39 #################################
40 def failure(self, mode_name, n):
41 print 'Calculating percolation due to random failure'
42 # initialise
43 counter = 0
44 results = {}
45
46 # take a random sample from the largest component
47 for v in np.random.choice(list(self.glc.vertices()),size=max(n.keys()),replace=False):
48 self.exclusion_map[self.g.vertex(v)] = 0
49 counter += 1
50 if counter in n.keys():
51 print counter,'nodes removed'
52 # graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
53 f = gt.GraphView(self.g, vfilt = self.exclusion_map)
54 # largest component of graph f
55 flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
56 print_info(flc,self.glc)
57 results[n[counter]] = percolation(flc,self.glc)
58 # visualize deterioration
59 # visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[counter]))+'_pct')
60
61 return results
62
63 #####################################################################
64
65 def random_walk(self, mode_name, n):
66 print 'Calculating percolation due to random walk'
67 #first vertex for random walk
68 start = self.glc.vertex(np.random.randint(0,self.glc.num_vertices()), use_index=False)
69
70 #do random walk
71 alternate_list = list(self.label_map.a)
72 np.random.shuffle(alternate_list)
73 results = rw(self,start,n,alternate_list,mode_name)
74
75 #return dict(zip(percentages,percolations))
76 return results
77
78 #####################################################################
79
80 #################################
81 ## ADVANCED PERCOLATION MODES ###
82 #################################
83 def target_list(self, mode_name, n):
84 print 'Calculating percolation due to targeted attack , i.e. taking out top nodes from a target list'
85 # instantiate results dictionary and target lists
86 results = {}
87 nodes_max = {}
88
89 #loop through all metrics
90 all_metrics = list(self.base_metrics.keys() + self.advanced_metrics.keys())
91 for metric in all_metrics:
92 #get nodes with highest value of metric
93 nodes_max[metric] = self.redis.zrange(self.metric_prefix+metric+self.normalization_suffix, -max(n.keys()), -1, withscores=False, score_cast_func=float).reverse()
94 #loop through all scores
95 all_scores = list(self.scores.keys() + self.advanced_scores.keys())
96 for score in all_scores:
97 #get nodes with highest value of score
98 nodes_max[score] = self.redis.zrange(self.score_prefix+score, -max(n.keys()), -1, withscores=False, score_cast_func=float).reverse()
99
100 #loop through all metrics and scores
101 for metric in all_metrics+all_scores:
102 print 'Taking out top nodes for metric',metric
103
104 # initialise variables and exclusion map
105 counter = 0
106 self.exclusion_map.a = 1
107 results[metric] = {}
108
109 for node in nodes_max[metric]:
110 vertex = gt.find_vertex(self.g,self.label_map,node)[0]
111 self.exclusion_map[vertex] = 0
112 counter += 1
113 if counter in n.keys():
114 print counter,'nodes removed'
115 # graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
116 f = gt.GraphView(self.g, vfilt = self.exclusion_map)
117 # largest component of graph f
118 flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
119 print_info(flc,self.glc)
120 results[metric][n[counter]] = percolation(flc,self.glc)
121 # visualize deterioration
122 # visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+metric+'_'+str(int(n[counter]))+'_pct')
123
124 return results
125
126 #####################################################################
127
128 def hybrid_mode(self, mode_name, n):
129 print 'Calculating percolation due to random walk starting from node with highest value of metric'
130 # instantiate results dictionary and alternate lists for random walk
131 results = {}
132 alternate_lists = {}
133
134 #loop through all metrics
135 all_metrics = list(self.base_metrics.keys() + self.advanced_metrics.keys())
136 for metric in all_metrics:
137 #get nodes with highest value of metric
138 temp_list = self.redis.zrange(self.metric_prefix+metric+self.normalization_suffix, 0, -1, withscores=False, score_cast_func=float)
139 alternate_lists[metric] = [node for node in reversed(temp_list)]
140
141 #loop through all scores
142 all_scores = list(self.scores.keys() + self.advanced_scores.keys())
143 for score in all_scores:
144 #get nodes with highest value of score
145 temp_list = self.redis.zrange(self.score_prefix+score, 0, -1, withscores=False, score_cast_func=float)
146 alternate_lists[score] = [node for node in reversed(temp_list)]
147
148 #loop through all metrics and scores
149 for metric in all_metrics+all_scores:
150 print 'Starting from node with highest value of metric',metric
151 #initialise exclusion vertex property map
152 self.exclusion_map.a = 1
153
154 #first vertex for random walk
155 start = gt.find_vertex(self.g,self.label_map,alternate_lists[metric][0])[0]
156
157 #do random walk
158 results[metric] = rw(self,start,n,alternate_lists[metric],mode_name+'_'+metric)
159
160 return results
161
162 def russian(self, mode_name, n):
163 print 'Calculating percolation due to shutting off the Russian network from the internet'
164 # instantiate results dictionary and target lists
165 #results = {}
166 #nodes_max = {}
167 self.exclusion_map.a = 0
168 counter = 0
169 results = {}
170 for v in self.g.vertices():
171 if self.g.vp.country_code[v] == 'RU':
172 print 'Shutting off node',int(v),'because it\'s Russian!'
173 self.exclusion_map[v] = 1
174 counter += 1
175 # if counter in n.keys():
176 # print counter,'nodes removed'
177 # graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
178 # f = gt.GraphView(self.g, vfilt = self.exclusion_map)
179 # largest component of graph f
180 # flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
181 # print_info(flc,self.glc)
182 # results[n[counter]] = percolation(flc,self.glc)
183 # visualize deterioration
184 # visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+metric+'_'+str(int(n[counter]))+'_pct')
185
186 f = gt.GraphView(self.g, vfilt = self.exclusion_map)
187 flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
188 #results[max(n.values())] = percolation(flc,self.g)
189 # visualize deterioration
190 print 'Creating visualization #1 of the deterioration.'
191 visualization.draw_deterioration(self,self.g.vp.sfdp,mode_name+"_SFDP_inverse")
192 print 'Creating visualization #2 of the deterioration.'
193 visualization.draw_deterioration(self,self.g.vp.Random,mode_name+"_Random_inverse")
194 print 'Creating visualization #3 of the deterioration.'
195 visualization.draw_deterioration(self,self.g.vp.Radial,mode_name+"_Radial_inverse")
196 #return results
197 #####################################################################
198 ############## Random Walk for the RW deletion modes ################
199 #####################################################################
200
201 # takes as input a start vertex, the number of vertices to take out
202 # and an alternate list of vertices if the random walk reaches a dead end
203
204 def rw(self, vertex, n, alternate_list, mode_name):
205 # initialise
206 results = {}
207
208 self.exclusion_map[vertex] = 0 #take out start vertex
209 # initialise graph filters
210 # graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
211 f = gt.GraphView(self.g, vfilt = self.exclusion_map)
212 # largest component of graph f
213 flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
214 if 1 in n.keys():
215 print '1 node removed'
216 print_info(flc,self.glc)
217 results[n[1]] = percolation(flc,self.glc)
218 # visualize deterioration
219 # visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[1]))+'_pct')
220
221 for i in range(max(n.keys())-1):
222 neighbours = list(vertex.all_neighbours())
223 flag = 0 #decision flag
224
225 # choose a random neighbour
226 if len(neighbours) > 0:
227 np.random.shuffle(neighbours)
228 for neighbour in neighbours:
229 if self.exclusion_map[neighbour] != 0:
230 vertex = neighbour
231 flag = 1
232 break
233
234 # to be executed if no neighbours exist - choose the next node out of an alternative list
235 if flag == 0:
236 # create a list of already used list members
237 used_list = []
238 for node in alternate_list:
239 vertex = gt.find_vertex(self.g,self.label_map,node)[0]
240 used_list.append(node)
241 if self.exclusion_map[vertex] != 0:
242 break
243 if len(used_list) > 0:
244 for used_node in used_list:
245 # remove used members from alternate list. This reduces calculation time in next iteration
246 alternate_list.remove(used_node)
247
248 self.exclusion_map[vertex] = 0 #take out vertex
249 f = gt.GraphView(self.g, vfilt = self.exclusion_map) #update graph (filtered)
250 if i+2 in n.keys():
251 flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) #update largest component
252 print i+2,'nodes removed'
253 print_info(flc,self.glc)
254 results[n[i+2]] = percolation(flc,self.glc)
255 # visualize deterioration
256 # visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[i+2]))+'_pct')
257
258 return results
259
260 ##############################
261 ##############################
262 ########## THE END ###########
263 ##############################
264 ##############################
265
File percolation.pyc added (mode: 100644) (index 0000000..e81344c)
File ru_file_importer.py added (mode: 100644) (index 0000000..5c06bec)
1 import networkx as nx
2 import graph_tool.all as gt
3
4 class FileImporter(object):
5 def __init__(self,filename):
6 # read graph from *.gt file (graph-tool file) and initialize empty networkx graph
7 print 'Starting file importer!'
8 self.graph_gt = gt.load_graph(filename)
9 self.graph = nx.Graph()
10 self.graph_gt_labels = self.graph_gt.vp.label_map
11 # Russian ASN file
12 self.asn_file = open('RU_ASN.txt')
13 self.graph_gt.vp['country_code'] = self.graph_gt.new_vertex_property("string")
14
15 def read(self):
16 # reconstruct networkx graph from graph-tool graph
17 for edge in self.graph_gt.edges():
18 from_node = int(self.graph_gt_labels[edge.source()])
19 to_node = int(self.graph_gt_labels[edge.target()])
20 print "Creating edge from node",from_node,"to node",to_node
21 if (from_node != to_node):
22 self.graph.add_edge(from_node, to_node)
23 return self.graph
24
25 def read_gt(self):
26 return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
27
28 def read_country(self):
29 labels = list(self.graph_gt_labels.a)
30 for line in self.asn_file:
31 print 'Russian ASN:',line
32 asn = float(line.strip())
33
34 if asn in labels:
35 vertex = gt.find_vertex(self.graph_gt,self.graph_gt_labels,asn)[0]
36 self.graph_gt.vp.country_code[vertex] = 'RU'
37 labels.remove(asn)
38 return self.graph_gt.vp.country_code
File ru_file_importer.pyc added (mode: 100644) (index 0000000..2ec49e6)
File ru_metric_calculator.py added (mode: 100644) (index 0000000..05d8f41)
1 import networkx as nx
2 import graph_tool.all as gt
3 import redis as rd
4 import numpy as np
5 import indexing
6 import statistics
7 import normalizations
8 import config
9 import percolation
10 import visualization
11 import datetime as dt
12
13
14 class MetricCalculator(object):
15 def __init__ (self, graph, graph_gt):
16 #class constructor
17 #define required class variables such as the graph to work on, the redis connection and the nodes of the graph
18
19 print ('Starting metric_calculator!')
20
21 # for code evaluation
22 self.start_time = dt.datetime.now()
23 self.durations = {}
24 self.durations_in_seconds = {}
25 self.durations_in_percent = {}
26
27 self.graph = graph
28 self.graph_gt = graph_gt
29
30 # alternate name for graph tool graph
31 self.g = self.graph_gt['graph_gt']
32 # alternate name for graph tool labels
33 if not hasattr(self.g.vp, 'label_map'):
34 self.g.vp.label_map = self.graph_gt['graph_gt_labels']
35 self.label_map = self.g.vp.label_map
36 # vertex property map for percolation calculations
37 if not hasattr(self.g.vp, 'exmap'):
38 self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
39
40 self.exclusion_map = self.g.vp.exmap
41 self.exclusion_map.a = 1 #initialise filter map
42 #find largest component of graph tool graph for percolation calculations
43 # percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
44 self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
45
46 if not hasattr(self.g.vp, 'eigenvector'):
47 eigenvalue, self.g.vp.eigenvector = gt.eigenvector(self.g)
48 if not hasattr(self.g.ep, 'betweenness'):
49 betweenness,self.g.ep.betweenness = gt.betweenness(self.g)
50
51 self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
52 self.nodes = nx.nodes(graph)
53
54
55 # configuration variables are read from the config file and are also saved to class variables for easy access
56 self.graph_index_key = config.graph_index_key
57
58 self.graph_name = ''
59 while (self.graph_name == ''):
60 self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
61
62 self.info_index_key = self.graph_name+':'+config.info_index_key
63 self.node_index_key = self.graph_name+':'+config.node_index_key
64 self.metric_index_key = self.graph_name+':'+config.metric_index_key
65 self.score_index_key = self.graph_name+':'+config.score_index_key
66 self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
67 self.layout_index_key = self.graph_name+':'+config.layout_index_key
68
69 self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
70 self.node_prefix = self.graph_name+':'+config.node_prefix
71 self.metric_prefix = self.graph_name+':'+config.metric_prefix
72 self.score_prefix = self.graph_name+':'+config.score_prefix
73 self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
74 self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
75
76 self.normalization_suffix = config.normalization_suffix
77
78 self.base_metrics = config.base_metrics
79 self.advanced_metrics = config.advanced_metrics
80
81 self.normalization_methods = config.normalization_methods
82
83 self.scores = config.scores
84 self.advanced_scores = config.advanced_scores
85
86 self.visualization_layouts = config.visualization_layouts
87 # this is commented out for testing purposes
88 # self.percolation_modes = config.percolation_modes
89 # self.advanced_percolation_modes = config.advanced_percolation_modes
90
91 self.percolation_modes = {'russian_shutoff':config.percolation_modes['russian_shutoff']}
92 self.advanced_percolation_modes = {}
93
94 ##############################################################################
95 ###### start describes the entire calculation in a high level overview #######
96 ##############################################################################
97
98 def start(self):
99 start_time_calculation = dt.datetime.now()
100
101 #preliminary calculations
102 #self.flush_database()
103 self.obtain_percentages()
104 #self.create_info()
105 #self.create_standard_layout()
106 #self.save_graph_data('raw')
107
108 #index creation
109 #self.create_indexes()
110
111 #main calculations
112 #self.calculate_metrics()
113 #self.calculate_advanced_metrics()
114 #self.normalize_metrics()
115 #self.calculate_scores()
116 #self.calculate_advanced_scores()
117
118 #statistics
119 #self.calculate_statistics()
120
121 #dynamic metrics / percolation
122 self.calculate_percolation()
123
124 #visualization
125 #self.visualize_graph()
126
127 #save final graph
128 self.save_graph_data('russian')
129
130 #evaluation
131 self.duration_total = dt.datetime.now() - start_time_calculation
132 self.evaluate_durations()
133
134
135 ###################
136 ## PRELIMINARIES ##
137 ###################
138 def flush_database(self):
139 # ask to clean all data in Redis
140 flush_flag = 'Flushing'
141 while (flush_flag != 'y' and flush_flag != 'n'):
142 flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
143 if flush_flag == 'y':
144 self.redis.flushdb()
145
146 def obtain_percentages(self):
147 # obtain percentages for calculation of deterioration #
148 # and calculate number of nodes to remove from graph ##
149 percentages = '' # initialise
150 while (percentages == ''):
151 percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
152
153 percentages = sorted([float(pct)for pct in percentages.split()])
154 numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
155 # create a dictionary of percentages and corresponding numbers of nodes
156 self.percentages = dict(zip(numbers,percentages))
157 # storing values in redis DB
158 #self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
159
160 def create_info(self):
161 #store general info about graph
162 self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
163 self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
164
165 def create_standard_layout(self):
166 # create a standard layout
167 start_time = dt.datetime.now()
168 print 'Creating standard layout for graph visualization.'
169 if not hasattr(self.g.vp,'sfdp'):
170 self.g.vp.sfdp = gt.sfdp_layout(self.g, C=0.5)
171 #self.durations['SFDP_layout'] = dt.datetime.now() - start_time
172 print self.durations['SFDP_layout']
173
174 def save_graph_data(self,name):
175 # save graph
176 start_time = dt.datetime.now()
177 print 'Saving raw graph data'
178 self.g.save(self.graph_name+'_'+name+'.gt.gz')
179 self.durations['saving_graph'+name] = dt.datetime.now() - start_time
180
181 ##################
182 #### INDEXING ####
183 ##################
184 def create_indexes(self):
185 start_time = dt.datetime.now()
186 #call methods defined in indexing.py
187 #indexing.index_graph(self)
188 #indexing.index_nodes(self)
189 #indexing.index_neighbors(self)
190 #indexing.index_metrics(self)
191 #indexing.index_scores(self)
192 #indexing.index_percolation(self)
193 self.durations['indexing'] = dt.datetime.now() - start_time
194
195 ###########################
196 #### CALCULATION LOOPS ####
197 ###########################
198
199 def calculate_metrics(self):
200 start_time_total = dt.datetime.now()
201 # loop through all defined metrics and call specified calculation method for each node
202 print ('Starting calculate_metrics')
203 for metric_name in self.base_metrics:
204 start_time = dt.datetime.now()
205 metric_method = self.base_metrics[metric_name]
206
207 # loop through all nodes
208 for node in self.nodes:
209 # call calculation method of supplied metric for current node
210 node = int(node)
211 value = float(metric_method(self,node))
212
213 #store result in node values
214 self.redis.hset(self.node_prefix+str(node), metric_name, value)
215
216 #also store result to metric set
217 self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
218 self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
219 self.durations['metrics_total'] = dt.datetime.now() - start_time_total
220
221
222 def calculate_advanced_metrics(self):
223 start_time_total = dt.datetime.now()
224 # loop through all defined_advanced_metrics and call specified calculation method
225 print ('Starting calculate_advanced_metrics')
226 for advanced_metric_name in self.advanced_metrics:
227 start_time = dt.datetime.now()
228 metric_method = self.advanced_metrics[advanced_metric_name]
229
230 # loop through all nodes
231 for node in self.nodes:
232 node = int(node)
233 value = float(metric_method(self,node))
234
235 #store result in node values
236 self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
237
238 #also store result to metric set
239 self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
240 self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
241 self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
242
243
244 # loop through all defined normalizations and call respective normalization method
245 # no default normalizations for metrics not listed in the "normalization_methods" hash
246 def normalize_metrics(self):
247 start_time = dt.datetime.now()
248 #fallback normalization: min-max
249 print ('Starting normalize_metrics')
250 all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
251
252 for metric_name in all_metrics:
253 if self.normalization_methods.has_key(metric_name):
254 normalization_method = self.normalization_methods[metric_name]
255 else:
256 #fallback normalization is min-max
257 normalization_method = normalizations.min_max
258 normalization_method(self,metric_name)
259
260 self.durations['normalizing'] = dt.datetime.now() - start_time
261
262
263 def calculate_scores(self):
264 start_time = dt.datetime.now()
265 print ('Starting calculate_scores')
266 for score_name in self.scores:
267 metrics_with_weights = self.scores[score_name]
268
269 for node in self.nodes:
270 score_value = 0.0
271
272 # get normalized values
273 for metric in metrics_with_weights:
274 weight = self.scores[score_name][metric]
275 value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
276 score_value += weight * value
277
278 #redis_server.hset(key, value, number);
279 self.redis.hset(self.node_prefix+str(node),score_name, score_value)
280
281 self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
282
283 self.durations['scores'] = dt.datetime.now() - start_time
284
285 def calculate_advanced_scores(self):
286 start_time = dt.datetime.now()
287 print ('Starting calculate_advanced_scores')
288 for advanced_score in self.advanced_scores:
289 self.advanced_scores[advanced_score](self)
290
291 self.durations['adv_scores'] = dt.datetime.now() - start_time
292
293
294 #############
295 # statistics
296 #############
297
298 def calculate_statistics(self):
299 start_time = dt.datetime.now()
300 print ('Starting calculate_statistics')
301 for metric in self.base_metrics:
302 #absolute and normalized
303 statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
304 statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
305
306 for advanced_metric in self.advanced_metrics:
307 #absolute and normalized
308 statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
309 statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
310
311 for score in self.scores:
312 statistics.calculate_statistics(self, score, self.score_prefix+score)
313
314 for advanced_score in self.advanced_scores:
315 statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
316 self.durations['statistics:stats'] = dt.datetime.now() - start_time
317
318 start_time = dt.datetime.now()
319 statistics.calculate_correlations(self)
320 self.durations['statistics:corr'] = dt.datetime.now() - start_time
321
322 ###################
323 # dynamic metrics #
324 ###################
325
326 def calculate_percolation(self):
327 start_time_total = dt.datetime.now()
328 print ('Starting percolation calculation')
329
330 # shorten the name for percentages and corresponding numbers of nodes to remove
331 n = self.percentages
332
333 # BASIC PERCOLATION MODES
334 # basic percolation modes take mode_name and n as input and return a #
335 # dictionary with percentage of nodes removed as key and percentage ##
336 # of deterioration as value
337 for mode_name in self.percolation_modes:
338 start_time = dt.datetime.now()
339 # initialise exlusion vertex property map
340 self.exclusion_map.a = 1
341 # read method from config file
342 mode_method = self.percolation_modes[mode_name]
343 # execute method
344 #results = mode_method(self,mode_name,n)
345 mode_method(self,mode_name,n)
346 # index percolation mode
347 #self.redis.sadd(self.percolation_index_key, mode_name)
348 # store values
349 #print 'Storing percolation percentages'
350 #for percentage in results:
351 # value = results[percentage]
352 #store in hash set
353 #self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
354
355 self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
356
357 # ADVANCED PERCOLATION MODES
358 # advanced percolation modes take mode_name and n as input and return a ###
359 # dictionary with groups of percolation modes (e.g. metrics, countries) ###
360 # as keys and dictionaries of percentages (removed: deteriorated) as values
361 for mode_name in self.advanced_percolation_modes:
362 start_time = dt.datetime.now()
363 # initialise exlusion vertex property map
364 self.exclusion_map.a = 1
365 # read method from config file
366 mode_method = self.advanced_percolation_modes[mode_name]
367 # execute method
368 results = mode_method(self,mode_name,n)
369
370 # store values
371 #print 'Storing percolation percentages'
372 #for group in results:
373 # index percolation modes
374 # self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
375 #for percentage in results[group]:
376 # value = results[group][percentage]
377 #store in hash set
378 #self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
379
380 self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
381
382 self.durations['percolation_total'] = dt.datetime.now() - start_time_total
383
384
385 def visualize_graph(self):
386
387 for layout_name in self.visualization_layouts:
388 start_time = dt.datetime.now()
389 print 'Creating visualisation with '+layout_name+' layout'
390
391 layout_method = self.visualization_layouts[layout_name]
392 self.g.vp[layout_name] = layout_method(self)
393 gt.graph_draw(self.glc, pos=self.g.vp[layout_name], output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
394
395 self.redis.sadd(self.layout_index_key, layout_name)
396 self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
397 print self.durations['layout:'+layout_name]
398
399 def evaluate_durations(self):
400 #print out times taken
401 print 'times taken:'
402 output = open(str(self.graph_name)+"_duration_test_2.txt","w")
403 output.write("Graph Name:\t"+str(self.graph_name)+"\n")
404 output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
405 output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
406 output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
407 output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
408 for key in self.durations:
409 self.durations_in_seconds[key] = self.durations[key].total_seconds()
410 self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
411
412 print str(key)+'\t'+str(self.durations_in_percent[key])
413 output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')
File ru_metric_calculator.pyc added (mode: 100644) (index 0000000..3d11047)
File ru_start.py added (mode: 100644) (index 0000000..faf282d)
1 #!/usr/bin/env python
2 import datetime
3 import argparse
4 import cProfile, pstats, StringIO
5 from ru_file_importer import FileImporter
6 from ru_metric_calculator import MetricCalculator
7 import datetime as dt
8
9 print 'Starting metric calculation',dt.datetime.now()
10 ## added this line of code ###
11 start_time = dt.datetime.now()
12 ##############################
13 parser = argparse.ArgumentParser(description='Read a Tab-separated Graph Datafile and start Calculation of Metrics and Statistics as configured in config.py')
14
15 parser.add_argument('filename', metavar='filename', type=str,
16 help='the name of the data file containing tab separated node ids')
17
18 parser.add_argument('--profiling',dest='profiling',action='store_true', help='enable runtime profiling into profiling.txt file')
19
20 args = parser.parse_args()
21
22 if args.profiling:
23 pr = cProfile.Profile()
24 s = StringIO.StringIO()
25 timestamp = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
26 outfile = open('profiling_output_'+timestamp+'.txt', 'w')
27 pr.enable()
28
29 fi = FileImporter(args.filename)
30 graph = fi.read()
31 #print('This should be a Network X graph',graph)
32 print 'Network X graph has the following number of nodes',graph.number_of_nodes()
33 print 'Network X graph has the following number of edges',graph.number_of_edges()
34 country_codes = fi.read_country()
35 graph_gt = fi.read_gt()
36 print 'Graph tool graph has the following number of nodes',graph_gt['graph_gt'].num_vertices()
37 print 'Graph tool graph has the following number of edges',graph_gt['graph_gt'].num_edges()
38 #print('Gt graph has the following properties')
39
40 time_delta = dt.datetime.now() - start_time
41 print 'Time taken to create graphs:',time_delta
42
43 mc = MetricCalculator(graph,graph_gt)
44 mc.start()
45
46 if args.profiling:
47 ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
48 ps.print_stats()
49 outfile.write(s.getvalue())
50
51 ## added this line of code ###
52 time_delta = dt.datetime.now() - start_time
53 ##############################
54 print 'Ending metric calculation',dt.datetime.now()
55 ## and this line below #######
56 print 'Time taken to calculate:',time_delta
File start.py added (mode: 100755) (index 0000000..2fa307e)
1 #!/usr/bin/env python
2 import datetime
3 import argparse
4 import cProfile, pstats, StringIO
5 from file_importer import FileImporter
6 from metric_calculator import MetricCalculator
7 import datetime as dt
8
9 print 'Starting metric calculation',dt.datetime.now()
10 ## added this line of code ###
11 start_time = dt.datetime.now()
12 ##############################
13 parser = argparse.ArgumentParser(description='Read a Tab-separated Graph Datafile and start Calculation of Metrics and Statistics as configured in config.py')
14
15 parser.add_argument('filename', metavar='filename', type=str,
16 help='the name of the data file containing tab separated node ids')
17
18 parser.add_argument('--profiling',dest='profiling',action='store_true', help='enable runtime profiling into profiling.txt file')
19
20 args = parser.parse_args()
21
22 if args.profiling:
23 pr = cProfile.Profile()
24 s = StringIO.StringIO()
25 timestamp = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
26 outfile = open('profiling_output_'+timestamp+'.txt', 'w')
27 pr.enable()
28
29 fi = FileImporter(args.filename)
30 graph = fi.read()
31 #print('This should be a Network X graph',graph)
32 print 'Network X graph has the following number of nodes',graph.number_of_nodes()
33 print 'Network X graph has the following number of edges',graph.number_of_edges()
34 graph_gt = fi.read_gt()
35 print 'Graph tool graph has the following number of nodes',graph_gt['graph_gt'].num_vertices()
36 print 'Graph tool graph has the following number of edges',graph_gt['graph_gt'].num_edges()
37 #print('Gt graph has the following properties')
38
39 ## added two lines of code ###
40 duration_parsing_lines = dt.datetime.now() - start_time
41 print 'Time taken to parse lines:',duration_parsing_lines
42
43 mc = MetricCalculator(graph,graph_gt)
44 mc.start()
45
46 if args.profiling:
47 ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
48 ps.print_stats()
49 outfile.write(s.getvalue())
50
51 ## added this line of code ###
52 duration_total = dt.datetime.now() - start_time
53 ##############################
54 print 'Ending metric calculation',dt.datetime.now()
55 ## and this line below #######
56 print 'Time taken to calculate:',duration_total
File statistics.py added (mode: 100644) (index 0000000..cf8423b)
1 #statistics.py
2 import redis as rd
3 import numpy as np
4 from scipy.stats import pearsonr
5
6 def calculate_statistics(self,metric,redis_key):
7 all_values = dict(self.redis.zrange(redis_key, 0, -1, withscores=True, score_cast_func=float)).values()
8 min_value = np.min(all_values)
9 max_value = np.max(all_values)
10
11 average = np.average(all_values)
12 median = np.median(all_values)
13 standard_deviation = np.std(all_values)
14
15 self.redis.hset(self.statistics_prefix+metric, 'min', min_value)
16 self.redis.hset(self.statistics_prefix+metric, 'max', max_value)
17 self.redis.hset(self.statistics_prefix+metric, 'average', average)
18 self.redis.hset(self.statistics_prefix+metric, 'median', median)
19 self.redis.hset(self.statistics_prefix+metric, 'standard_deviation', standard_deviation)
20
21
22 def calculate_correlations(self):
23 m = self.base_metrics.keys()
24 c = self.advanced_metrics.keys()
25
26 metrics = m + c
27
28 correlations = {}
29 for metric1 in metrics:
30 correlations[metric1] = {}
31 for metric2 in metrics:
32 correlations[metric1][metric2] = (0,0)
33 if metric1 == metric2:
34 correlations[metric1][metric2] = (1,0)
35 continue
36
37 dict_metric1 = dict(self.redis.zrange(self.metric_prefix+metric1, 0, -1, withscores=True, score_cast_func=float))
38 dict_metric2 = dict(self.redis.zrange(self.metric_prefix+metric2, 0, -1, withscores=True, score_cast_func=float))
39 values_metric1 = []
40 values_metric2 = []
41
42 for key in sorted(dict_metric1.iterkeys()):
43 values_metric1.append(dict_metric1[key])
44
45 for key in sorted(dict_metric2.iterkeys()):
46 values_metric2.append(dict_metric2[key])
47
48 correlations[metric1][metric2] = pearsonr(values_metric1,values_metric2)
49
50 values_metric1 = []
51 values_metric2 = []
52
53 for source in correlations:
54 for target in correlations[source]:
55 self.redis.hset(self.statistics_prefix+"correlations:"+source+":"+target, "correlation", correlations[source][target][0])
56 self.redis.hset(self.statistics_prefix+"correlations:"+source+":"+target, "confidence", correlations[source][target][1])
57
58
File statistics.pyc added (mode: 100644) (index 0000000..f8f55b5)
File test.py added (mode: 100644) (index 0000000..1a809a9)
1 import argparse
2 import graph_tool.all as gt
3 import redis as rd
4 import numpy as np
5 import config
6
7 parser = argparse.ArgumentParser(description='Read values from the Redis DB and write a short summary into an output text file.')
8 parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
9 args = parser.parse_args()
10 db_index = args.db_index
11
12 redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
13
14 #def read_redis_smembers(redis,key):
15 # s = redis.smembers(key) #read set
16 #return [i.strip() for i in [l.strip('[]').split(',') for l in s][0]] #write list and strip of useless characters
17 print config.graph_index_key
18 print redis.smembers(config.graph_index_key)
19
20 #for graph in redis.smembers(config.graph_index_key):
21 # all_nodes = redis.smembers(graph+':'+config.node_index_key)
22
23 #output = open(str(graph)+"-DB-"+str(db_index)+"test_out.txt","w")
24 #output.write("Graph Name: "+str(graph)+"\n")
25 #print all_nodes
26 #node_list = list(all_nodes)
27 #string = map(str,node_list)
28 #print '\n'.join(string)
29 #output.write('\n'.join(string))
30
31 #output.close
File visualization.py added (mode: 100644) (index 0000000..39e8f08)
1 import graph_tool.all as gt
2 import numpy as np
3
4 # these methods give as output a property map of positions (i.e. the layout for the visualization)
5
6 def random(self):
7 # creating visualisation with Random layout
8 pos = gt.random_layout(self.glc)
9 coloured_drawing(self, pos, 'Random_Advanced', 'heptagon')
10 # gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_RANDOM.png")
11 return pos
12
13 def frucht(self):
14 # creating visualisation with Fruchtermann-Reingold layout
15 pos = gt.fruchterman_reingold_layout(self.glc, r=1.8, n_iter=36)
16 coloured_drawing(self, pos, 'Fruchterman_Reingold_Advanced', 'hexagon')
17 # gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_FRUCHT.png")
18 return pos
19
20 def arf(self):
21 # creating visualisation with ARF layout
22 pos = gt.arf_layout(self.glc, max_iter=1000)
23 # gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_ARF.png")
24 return pos
25
26 def radial(self):
27 # creating visualisation with Radial Tree layout
28 if not hasattr(self.g.vp, 'betweenness'):
29 for i in range(0,self.g.num_vertices()):
30 max_asn = self.redis.zrange(self.metric_prefix+'betweenness_centrality_(gt)',-i-1,-i-1,withscores=False,score_cast_func=float)[0]
31 max_vertex = gt.find_vertex(self.glc, self.label_map, max_asn)
32 #test whether vertex exists and vertex is in largest component
33 if max_vertex != None:
34 break #end loop
35 pos = gt.radial_tree_layout(self.glc, root=max_vertex[0], weighted=True)
36 else:
37 if hasattr(self.g.vp, 'eigenvector'):
38 temp = self.glc.vp.eigenvector
39 else:
40 temp = self.glc.vp.betweenness
41 max_vertex = gt.find_vertex(self.glc, self.glc.vp.betweenness, np.max(self.glc.vp.betweenness.a))
42 pos = gt.radial_tree_layout(self.glc, root=max_vertex[0], rel_order=temp, weighted=True, node_weight=temp)
43
44 #gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_RADIAL.png")
45 coloured_drawing(self, pos, 'Radial_Advanced_2')
46 return pos
47
48 def sfdp(self):
49 # creating visualisation with SFDP layout
50 if not hasattr(self.g.vp, 'sfdp'):
51 self.g.vp.sfdp = gt.sfdp_layout(self.glc)
52
53 pos = self.g.vp.sfdp
54
55 # test colouring of graph
56 coloured_drawing(self, pos, 'SFDP_Advanced', 'octagon')
57 return pos
58
59 def coloured_drawing(self, pos, name, shape="circle"):
60 if not hasattr(self, 'eigc'):
61 # caching to reduce calculation time overall
62 # define metric property maps for colouring
63 eigc = self.g.vp.eigenvector.copy()
64 ebwc = self.g.ep.betweenness.copy()
65
66 # right-size property maps
67 eigc.a = np.sqrt(eigc.a)
68 eigc = gt.prop_to_size(eigc)
69 ebwc = gt.prop_to_size(ebwc)
70 #vsize = eigc.copy() # obtain property map for size of vertices
71 eigc.a /= eigc.a.max() # normalization to 0-1
72 ebwc.a /= ebwc.a.max() # normalization to 0-1
73 # obtain maps for edges
74 eorder = ebwc.copy()
75 eorder.a *= -1
76 econtrol = self.g.new_edge_property("vector<double>")
77 for e in self.glc.edges():
78 d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 3
79 econtrol[e] = [0.3,d,0.7,d]
80 # storing for later access
81 self.eigc = eigc
82 self.ebwc = ebwc
83 self.eorder = eorder
84 self.econtrol= econtrol
85
86 if not hasattr(self, 'vcolour'):
87 vcolour = self.g.new_vertex_property("vector<double>") # obtain colour map
88 for v in self.glc.vertices():
89 vcolour[v] = [self.eigc[v],self.eigc[v]/4.0,self.eigc[v]/6.0,(1.0+2.0*self.eigc[v])/3.0]
90 #vsize[v] = int(vsize[v])
91
92
93 # obtain maps for edges
94 ecolour = self.g.new_edge_property("vector<double>")
95 for e in self.glc.edges():
96 ecolour[e] = [self.ebwc[e]/8.0,self.ebwc[e]/2.0,self.ebwc[e],(1.0+2.0*self.ebwc[e])/4.0]
97
98 # storing for later access
99 self.vcolour = vcolour
100 self.ecolour = ecolour
101
102 gt.graph_draw(self.glc, pos=pos, vertex_shape=shape, vertex_fill_color=self.vcolour, vorder=self.eigc, edge_color=self.ecolour, eorder=self.eorder, edge_control_points=self.econtrol, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+name+".png")
103
104 self.redis.sadd(self.layout_index_key, name)
105
106 def draw_deterioration(self, pos, name, shape="circle"):
107 excl = self.exclusion_map.copy()
108 if not hasattr(self, 'eigc'):
109 # caching to reduce calculation time overall
110 # define metric property maps for colouring
111 eigc = self.g.vp.eigenvector.copy()
112 ebwc = self.g.ep.betweenness.copy()
113 # right-size property maps
114 eigc.a = np.sqrt(eigc.a)
115 eigc = gt.prop_to_size(eigc)
116 ebwc = gt.prop_to_size(ebwc)
117 #vsize = eigc.copy() # obtain property map for size of vertices
118 eigc.a /= eigc.a.max() # normalization to 0-1
119 ebwc.a /= ebwc.a.max() # normalization to 0-1
120 # obtain maps for edges
121 eorder = ebwc.copy()
122 eorder.a *= -1
123
124 econtrol = self.g.new_edge_property("vector<double>")
125 for e in self.glc.edges():
126 d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 3
127 econtrol[e] = [0.3,d,0.7,d]
128 # storing for later access
129 self.eigc = eigc
130 self.ebwc = ebwc
131 self.eorder = eorder
132 self.econtrol= econtrol
133
134 # graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
135 f = gt.GraphView(self.g, vfilt = excl)
136 # largest component of graph f
137 l = gt.label_largest_component(f)
138 vfcolour = self.g.new_vertex_property("vector<double>") # obtain colour map
139 vcolour = vfcolour.copy()
140 for v in self.g.vertices():
141 excl[v] *= l[v]
142 if excl[v] != 1:
143 vfcolour[v] = [0.0,0.0,0.0,0.01]
144 vcolour[v] = [0.0,0.0,0.0,0.05]
145 else:
146 vfcolour[v] = [self.eigc[v],self.eigc[v]/4.0,self.eigc[v]/6.0,(1.0+2.0*self.eigc[v])/3.0]
147 vcolour[v] = [self.eigc[v]/2.0,self.eigc[v]/3.0,self.eigc[v]/4.0,(2.0+1.0*self.eigc[v])/3.0]
148 #vsize[v] = int(vsize[v])
149
150 ecolour = self.g.new_edge_property("vector<double>")
151 for e in self.g.edges():
152 if excl[e.target()] != 1 or excl[e.source()] != 1:
153 ecolour[e] = [0.0,0.0,0.0,0.05]
154 else:
155 ecolour[e] = [self.ebwc[e]/8.0,self.ebwc[e]/2.0,self.ebwc[e],(1.0+2.0*self.ebwc[e])/4.0]
156
157 gt.graph_draw(self.g, pos=pos, vertex_shape=shape, vertex_color=vcolour, vertex_fill_color=vfcolour, vorder=self.eigc, edge_color=ecolour, eorder=self.eorder, edge_control_points=self.econtrol, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+name+".png")
158
159 #self.redis.sadd(self.layout_index_key,name)
160 #self.redis.hset(self.percolation_prefix+self.layout_index_key,name,pct)
File visualization.pyc added (mode: 100644) (index 0000000..a4a569c)
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:
git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main