List of commits:
Subject Hash Author Date (UTC)
removed comments and unneded file 008e4a2b2b6cafc966904bec4b5f1de56aecbb2e mcehlert 2014-03-18 13:42:30
changed profiling output timestamp format e0969fdab8ecab90177fa11d7459dc46d45f5418 mcehlert 2014-02-24 15:55:31
added profiling option to start script 634b3664b6b694303fb9787781e074432967ead5 mcehlert 2014-02-24 15:45:06
added usage to readme 8125b1f4f8cb59d96c8444accab88d77bf6dd42a mcehlert 2014-01-09 16:02:47
a lot of refactoring for more modular structure 660e0d15e9b18aa9c1100c874e2140220f1c5860 mcehlert 2014-01-09 15:51:02
a lot of refactoring for more modular structure dcf7bd73ccc2f871ab8d48c43d11a8e5b392b6de mcehlert 2014-01-09 15:50:53
initial commit - pre colloquim state 655c77556f9d8e40b52893887cdb0d90f726fdbf Mathias Ehlert 2013-11-22 13:47:29
Initial commit f53ec7a3f25d55c53aa12c2682b216e16570cdc7 Mathias Ehlert 2013-11-22 13:37:47
Commit 008e4a2b2b6cafc966904bec4b5f1de56aecbb2e - removed comments and unneded file
Author: mcehlert
Author date (UTC): 2014-03-18 13:42
Committer name: mcehlert
Committer date (UTC): 2014-03-18 13:42
Parent(s): e0969fdab8ecab90177fa11d7459dc46d45f5418
Signing key:
Tree: 18d1f4bd44a33af6e03c4d28f62fee2aca7bb728
File Lines added Lines deleted
config.py 1 0
metric_calculator.py 14 404
pearson.py 0 45
File config.py changed (mode: 100644) (index d4c8c5e..6632f85)
... ... import metrics
3 3 import normalizations import normalizations
4 4 import advancedscores import advancedscores
5 5
6 #redis keys for indexes and values
6 7 node_index_key = 'all_nodes' node_index_key = 'all_nodes'
7 8 metric_index_key = 'all_metrics' metric_index_key = 'all_metrics'
8 9 score_index_key = 'all_scores' score_index_key = 'all_scores'
File metric_calculator.py changed (mode: 100644) (index 281cf20..f4b2e6e)
... ... import config
9 9
10 10 class MetricCalculator(object): class MetricCalculator(object):
11 11 def __init__ (self, graph): def __init__ (self, graph):
12 #class constructor
13 #define required class variables such as the graph to work on, the redis connection and the nodes of the graph
14
12 15 self.graph = graph self.graph = graph
13 16 self.redis = rd.StrictRedis(host='localhost', port=6379, db=0) self.redis = rd.StrictRedis(host='localhost', port=6379, db=0)
14 17 self.nodes = nx.nodes(graph) self.nodes = nx.nodes(graph)
15 18
19
20 # configuration variables are read from the config file and are also saved to class variables for easy access
16 21 self.node_index_key = config.node_index_key self.node_index_key = config.node_index_key
17 22 self.metric_index_key = config.metric_index_key self.metric_index_key = config.metric_index_key
18 23 self.score_index_key = config.score_index_key self.score_index_key = config.score_index_key
 
... ... class MetricCalculator(object):
34 39 self.advanced_scores = config.advanced_scores self.advanced_scores = config.advanced_scores
35 40
36 41
37
38 # self.node_index_key = 'all_nodes'
39 # self.metric_index_key = 'all_metrics'
40 # self.score_index_key = 'all_scores'
41 #
42 # self.node_neighbors_prefix = 'node_neighbors:'
43 # self.node_prefix = 'node_metrics:'
44 # self.metric_prefix = 'metric:'
45 # self.statistics_prefix = 'statistics:'
46 #
47 # self.normalization_suffix = '_normalized'
48 #
49 # # definition of all base metrics for which absolute values will be calculcated for each node in the first step
50 # # key is the name of the metric and value is the implemented method which exposes the required interface
51 # # interface: each method takes the node as the single parameter, performs the necessary calculation and
52 # # returns a float containing the value for the specified node
53 #
54 # self.metrics = { 'clustering_coefficient' : self.clustering_coefficient,
55 # 'degree' : self.degree,
56 # 'average_neighbor_degree' : self.average_neighbor_degree,
57 # 'iterated_average_neighbor_degree': self.iterated_average_neighbor_degree,
58 # 'betweenness_centrality' : self.betweenness_centrality,
59 # 'eccentricity' : self.eccentricity,
60 # 'average_shortest_path_length' : self.average_shortest_path_length
61 # }
62 #
63 #
64 # # some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
65 # # key is the metric name and value the method for correction
66 #
67 #
68 # self.advanced_metrics = { 'corrected_clustering_coefficient' : self.correct_clustering_coefficient,
69 # 'corrected_average_neighbor_degree' : self.correct_average_neighbor_degree,
70 # 'corrected_iterated_average_neighbor_degree': self.correct_iterated_average_neighbor_degree}
71 #
72 #
73 #
74 # # for every metric, a normalization method has to be specified
75 # # key is the name of the metric and value is the normalization method which also has to expose the required interface
76 # # interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
77 # # the method itself shall access the data which is required for normalization from the redis instance
78 # # and the corresponding keys/values for the specified metric
79 # # it shall then loop over all nodes and calculate the normalized value for the node and the metric
80 # # afterwards it should save the result to redis using "metric_name_normalized" as the key
81 # # the result is stored inside the node's hash for metrics
82 #
83 # # also needs to include corrected metrics with their respective names
84 # #
85 # self.normalization_methods = { 'clustering_coefficient' : self.min_max_normalization,
86 # 'corrected_clustering_coefficient' : self.min_max_normalization,
87 # 'degree' : self.min_max_normalization,
88 # 'average_neighbor_degree' : self.min_max_normalization,
89 # 'corrected_average_neighbor_degree' : self.min_max_normalization,
90 # 'iterated_average_neighbor_degree' : self.min_max_normalization,
91 # 'corrected_iterated_average_neighbor_degree': self.min_max_normalization,
92 # 'betweenness_centrality' : self.min_max_normalization,
93 # 'eccentricity' : self.inverse_min_max_normalization,
94 # 'average_shortest_path_length' : self.inverse_min_max_normalization
95 # }
96 #
97 #
98 # # the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
99 # # such scores can easily be defined here
100 # # note: names are not methods but redis keys
101 #
102 # self.scores = {'unified_risk_score': { #'corrected_clustering_coefficient': 0.2,
103 # 'degree_normalized': 0.25,
104 # 'corrected_average_neighbor_degree_normalized': 0.15,
105 # 'corrected_iterated_average_neighbor_degree_normalized': 0.1,
106 # 'betweenness_centrality_normalized': 0.25,
107 # 'eccentricity_normalized': 0.125,
108 # 'average_shortest_path_length_normalized': 0.125}
109 # }
110 #
111 #
112 # # other scores might require a more sophisticated algorithm to be calculated
113 # # such scores need to be added here and implemented like the example below
114 #
115 # self.advanced_scores = {'advanced_unified_risk_score': self.urs_clustering_coefficient_modification}
116
117
118
119
120 42
121 43 def start(self): def start(self):
122 44 #clean all data in Redis #clean all data in Redis
123 45 self.redis.flushdb() self.redis.flushdb()
124 46
125 47 #index creation #index creation
126 #self.index_nodes()
127 #self.index_neighbors()
128 #self.index_metrics()
129 #self.index_scores()
130
131 48 self.create_indexes() self.create_indexes()
132 49
133 50
 
... ... class MetricCalculator(object):
145 62 #### INDEXING #### #### INDEXING ####
146 63 ################## ##################
147 64 def create_indexes(self): def create_indexes(self):
65 #call methods defined in indexing.py
148 66 indexing.index_nodes(self) indexing.index_nodes(self)
149 67 indexing.index_neighbors(self) indexing.index_neighbors(self)
150 68 indexing.index_metrics(self) indexing.index_metrics(self)
151 69 indexing.index_scores(self) indexing.index_scores(self)
152 70
153
154 # def index_nodes(self):
155 # self.redis.sadd(self.node_index_key, *self.nodes)
156 #
157 # def index_neighbors(self):
158 # for node in self.nodes:
159 # node_neighbors = self.graph.neighbors(int(node))
160 # self.redis.sadd(self.node_neighbors_prefix+str(node), *node_neighbors)
161 #
162 # def index_metrics(self):
163 # for metric in self.metrics:
164 # self.redis.sadd(self.metric_index_key, metric)
165 #
166 # for advanced_metric in self.advanced_metrics:
167 # self.redis.sadd(self.metric_index_key, advanced_metric)
168 #
169 # def index_scores(self):
170 # for score in self.scores:
171 # self.redis.sadd(self.score_index_key, score)
172 #
173 # for advanced_score in self.advanced_scores:
174 # self.redis.sadd(self.score_index_key, advanced_score)
175
176 71 ########################### ###########################
177 72 #### CALCULATION LOOPS #### #### CALCULATION LOOPS ####
178 73 ########################### ###########################
179 # loop through all defined metrics and call specified calculation method for each node
74
180 75 def calculate_metrics(self): def calculate_metrics(self):
76 # loop through all defined metrics and call specified calculation method for each node
181 77 for metric_name in self.base_metrics: for metric_name in self.base_metrics:
182 78 metric_method = self.base_metrics[metric_name] metric_method = self.base_metrics[metric_name]
183 79
184 80 # loop through all nodes # loop through all nodes
185 81 for node in self.nodes: for node in self.nodes:
186
187 82 # call calculation method of supplied metric for current node # call calculation method of supplied metric for current node
188 83 node = int(node) node = int(node)
189 84 value = float(metric_method(self,node)) value = float(metric_method(self,node))
 
... ... class MetricCalculator(object):
194 89 #also store result to metric set #also store result to metric set
195 90 self.redis.zadd(self.metric_prefix+metric_name, value, str(node)) self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
196 91
197 # loop through all defined_advanced_metrics and call specified calculation method
92
198 93 def calculate_advanced_metrics(self): def calculate_advanced_metrics(self):
94 # loop through all defined_advanced_metrics and call specified calculation method
199 95 for advanced_metric_name in self.advanced_metrics: for advanced_metric_name in self.advanced_metrics:
200 96 metric_method = self.advanced_metrics[advanced_metric_name] metric_method = self.advanced_metrics[advanced_metric_name]
97
98 # loop through all nodes
201 99 for node in self.nodes: for node in self.nodes:
202 100 node = int(node) node = int(node)
203 101 value = float(metric_method(self,node)) value = float(metric_method(self,node))
 
... ... class MetricCalculator(object):
225 123 normalization_method(self,metric_name) normalization_method(self,metric_name)
226 124
227 125
228
229
230 # # normalizations
231 # # min max normalization
232 # def min_max_normalization(self,metric_name):
233 # #perform min max normalization of specified metric for all nodes
234 # #min_max normalization
235 # #get min and max from redis
236 # x_min = self.redis.zrange(metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
237 # x_max = self.redis.zrange(metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
238 #
239 # #print x_min
240 # #print x_max
241 #
242 # for node in self.nodes:
243 # if x_min == x_max:
244 # x_normalized = 1.0
245 # else:
246 # x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
247 # x_normalized = (x - x_min) / (x_max - x_min)
248 #
249 # #store value for node and metric
250 # self.redis.zadd(metric_name+self.normalization_suffix, x_normalized, str(node))
251 # self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
252 #
253 # #max min normalization
254 # def inverse_min_max_normalization(self,metric_name):
255 # x_min = self.redis.zrange(metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
256 # x_max = self.redis.zrange(metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
257 #
258 # for node in self.nodes:
259 # if x_min == x_max:
260 # x_normalized = 1.0
261 # else:
262 # x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
263 # x_normalized = (x_max - x) / (x_max - x_min)
264 #
265 # #store value for node and metric
266 # self.redis.zadd(metric_name+self.normalization_suffix, x_normalized, str(node))
267 # self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
268 #
269 126 def calculate_scores(self): def calculate_scores(self):
270 127 for score_name in self.scores: for score_name in self.scores:
271 128 metrics_with_weights = self.scores[score_name] metrics_with_weights = self.scores[score_name]
 
... ... class MetricCalculator(object):
284 141
285 142 def calculate_advanced_scores(self): def calculate_advanced_scores(self):
286 143 for advanced_score in self.advanced_scores: for advanced_score in self.advanced_scores:
287 self.advanced_scores[advanced_score](self)
288
289
290 ###################################################
291 # actual metrics and corrections etc. below
292 # must return value which can be converted to float
293 ###################################################
294 #
295 # def clustering_coefficient(self,node):
296 # #in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
297 # #NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
298 # # in this case, just returning nx.clustering(self.graph, node) might be better
299 # if not hasattr(self, 'all_clustering_coefficients'):
300 # self.all_clustering_coefficients = nx.clustering(self.graph)
301 #
302 # #get the actual value from the pre-calculated hash
303 # return self.all_clustering_coefficients[node]
304 #
305 # def degree(self, node):
306 # return self.graph.degree(node)
307 #
308 #
309 # def average_neighbor_degree(self,node):
310 # # same caching technique as in self.clustering_coefficient
311 # # might also break for very large graphs
312 # # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go
313 #
314 # if not hasattr(self, 'all_average_neighbor_degrees'):
315 # self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
316 # return self.all_average_neighbor_degrees[node]
317 #
318 # def iterated_average_neighbor_degree(self, node):
319 #
320 # first_level_neighbors = self.graph.neighbors(node)
321 # second_level_neighbors = []
322 #
323 # # get all two-hop nodes
324 # for first_level_neighbor in first_level_neighbors:
325 # current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
326 # second_level_neighbors.extend(current_second_level_neighbors)
327 #
328 # #remove one-hop nodes and self
329 # relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
330 #
331 # degree_sum = 0
332 # for relevant_node in relevant_nodes:
333 # degree_sum += self.graph.degree(relevant_node)
334 #
335 # return float(degree_sum)/float(len(relevant_nodes))
336 #
337 # def betweenness_centrality(self, node):
338 # if not hasattr(self, 'all_betweenness_centralities'):
339 # self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
340 # return self.all_betweenness_centralities[node]
341 #
342 # def eccentricity(self, node):
343 # if not hasattr(self, 'all_eccentricities'):
344 # self.all_eccentricities = nx.eccentricity(self.graph)
345 # return self.all_eccentricities[node]
346 #
347 # def average_shortest_path_length(self, node):
348 # # caching average_shortest_path_length for all nodes at one failed
349 # # already switched to single calculation
350 #
351 # #get all shortest path lengths
352 # all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)
353 #
354 # #calculate average
355 # sum_of_lengths = 0
356 # for target in all_shortest_path_lengths_for_node:
357 # sum_of_lengths += all_shortest_path_lengths_for_node[target]
358 #
359 # return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)
360 #
361 #
362 ##############
363 ## corrections
364 ##############
365 # def correct_clustering_coefficient(self,node):
366 # clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
367 # degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
368 # corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
369 #
370 # return corrected_cc
371 #
372 # #def correct_clustering_coefficient(self):
373 #
374 # # for node in self.nodes:
375 # # clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
376 # # degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
377 #
378 # # corrected_cc = clustering_coefficient * (degree * clustering_coefficient) / float(4)
379 #
380 # # self.redis.hset(self.node_prefix+str(node), 'corrected_clustering_coefficient', corrected_cc)
381 # # self.redis.zadd('corrected_clustering_coefficient', corrected_cc, str(node))
382 #
383 # def correct_average_neighbor_degree(self,node):
384 # avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
385 #
386 # neighbors = self.graph.neighbors(node)
387 # number_of_neighbors = float(len(neighbors))
388 # neighbor_degrees = []
389 # for neighbor in neighbors:
390 # neighbor_degrees.append(self.graph.degree(neighbor))
391 #
392 # #using numpy median and standard deviation implementation
393 # numpy_neighbor_degrees = np.array(neighbor_degrees)
394 # median = np.median(numpy_neighbor_degrees)
395 # standard_deviation = np.std(numpy_neighbor_degrees)
396 #
397 # if avgnd == 0.0 or number_of_neighbors == 0.0 or standard_deviation == 0.0:
398 # return avgnd
399 # else:
400 # return avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
401 #
402 #
403 # def correct_iterated_average_neighbor_degree(self, node):
404 # avgnd = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
405 #
406 # first_level_neighbors = self.graph.neighbors(node)
407 # second_level_neighbors = []
408 #
409 # # get all two-hop nodes
410 # for first_level_neighbor in first_level_neighbors:
411 # current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
412 # second_level_neighbors.extend(current_second_level_neighbors)
413 #
414 # #remove one-hop neighbors and self
415 # relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
416 #
417 # number_of_nodes = len(relevant_nodes)
418 # node_degrees = []
419 # for rel_node in relevant_nodes:
420 # node_degrees.append(self.graph.degree(rel_node))
421 #
422 # numpy_node_degrees = np.array(node_degrees)
423 # median = np.median(numpy_node_degrees)
424 # standard_deviation = np.std(numpy_node_degrees)
425 #
426 # if avgnd == 0.0 or number_of_nodes == 0.0 or standard_deviation == 0.0:
427 # return avgnd
428 # else:
429 # return avgnd + ( ((median - avgnd) / standard_deviation) / number_of_nodes ) * avgnd
430 #
431 #
432 #
433 #
434 #################
435 ##advanced scores
436 #################
437 #
438 # def urs_clustering_coefficient_modification(self):
439 #
440 # #caching of values
441 # all_ccs_normalized = dict(self.redis.zrange('corrected_clustering_coefficient'+self.normalization_suffix, 0, -1, withscores=True, score_cast_func=float))
442 # all_urs = dict(self.redis.zrange('unified_risk_score', 0, -1, withscores=True, score_cast_func=float))
443 #
444 # urs_percentile_10 = np.percentile(all_urs.values(), 10)
445 # urs_percentile_90 = np.percentile(all_urs.values(), 90)
446 #
447 # for node in self.nodes:
448 # #cc_normalized = float(self.redis.hget(self.node_prefix+str(node),'corrected_clustering_coefficient'+self.normalization_suffix))
449 # #urs = float(self.redis.hget(self.node_prefix+str(node),'unified_risk_score'))
450 #
451 # cc_normalized = all_ccs_normalized[str(node)]
452 # urs = all_urs[str(node)]
453 #
454 #
455 # if (urs >= urs_percentile_90 or urs <= urs_percentile_10):
456 # if (cc_normalized >= 0.25):
457 # advanced_unified_risk_score = ((urs * 3.0) + cc_normalized) / 4.0
458 # else:
459 # advanced_unified_risk_score = urs
460 # else:
461 # advanced_unified_risk_score = urs
462 #
463 # #save for node
464 # self.redis.hset(self.node_prefix+str(node), 'advanced_unified_risk_score', advanced_unified_risk_score)
465 # #save for metric
466 # self.redis.zadd('advanced_unified_risk_score', advanced_unified_risk_score, str(node))
144 self.advanced_scores[advanced_score](self)
145
467 146
468 147 ############# #############
469 148 # statistics # statistics
 
... ... class MetricCalculator(object):
488 167
489 168 statistics.calculate_correlations(self) statistics.calculate_correlations(self)
490 169
491 #
492 #
493 # def calculate_statistics_for_absolute_values(self,metric):
494 # all_values = dict(self.redis.zrange(metric, 0, -1, withscores=True, score_cast_func=float)).values()
495 # min_value = np.min(np.array(all_values))
496 # max_value = np.max(all_values)
497 #
498 # average = np.average(all_values)
499 # median = np.median(all_values)
500 # standard_deviation = np.std(all_values)
501 #
502 # self.redis.hset(self.statistics_prefix+str(metric), 'min', min_value)
503 # self.redis.hset(self.statistics_prefix+str(metric), 'max', max_value)
504 # self.redis.hset(self.statistics_prefix+str(metric), 'average', average)
505 # self.redis.hset(self.statistics_prefix+str(metric), 'median', median)
506 # self.redis.hset(self.statistics_prefix+str(metric), 'standard_deviation', standard_deviation)
507 #
508 # def calculate_statistics_for_normalized_values(self,metric):
509 # all_values = dict(self.redis.zrange(metric+self.normalization_suffix, 0, -1, withscores=True, score_cast_func=float)).values()
510 #
511 # min_value = np.min(all_values)
512 # max_value = np.max(all_values)
513 #
514 # average = np.average(all_values)
515 # median = np.median(all_values)
516 # standard_deviation = np.std(all_values)
517 #
518 # self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'min', min_value)
519 # self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'max', max_value)
520 # self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'average', average)
521 # self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'median', median)
522 # self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'standard_deviation', standard_deviation)
523 #
524 #
525 # def calculate_correlations(self):
526 # m = self.metrics.keys()
527 # c = self.corrections.keys()
528 #
529 # metrics = m + c
530 #
531 # correlations = {}
532 # for metric1 in metrics:
533 # correlations[metric1] = {}
534 # for metric2 in metrics:
535 # correlations[metric1][metric2] = (0,0)
536 # if metric1 == metric2:
537 # correlations[metric1][metric2] = (1,0)
538 # continue
539 #
540 # dict_metric1 = dict(self.redis.zrange(metric1, 0, -1, withscores=True, score_cast_func=float))
541 # dict_metric2 = dict(self.redis.zrange(metric2, 0, -1, withscores=True, score_cast_func=float))
542 # values_metric1 = []
543 # values_metric2 = []
544 #
545 # for key in sorted(dict_metric1.iterkeys()):
546 # values_metric1.append(dict_metric1[key])
547 #
548 # for key in sorted(dict_metric2.iterkeys()):
549 # values_metric2.append(dict_metric2[key])
550 #
551 # correlations[metric1][metric2] = pearsonr(values_metric1,values_metric2)
552 #
553 # values_metric1 = []
554 # values_metric2 = []
555 #
556 # for source in correlations:
557 # for target in correlations[source]:
558 # self.redis.hset("correlations:"+source+":"+target, "correlation", correlations[source][target][0])
559 # self.redis.hset("correlations:"+source+":"+target, "confidence", correlations[source][target][1])
File pearson.py deleted (index 7a6cc1c..0000000)
1 import redis as rd
2 import numpy as np
3 from scipy.stats import pearsonr
4
5 metrics = ['clustering_coefficient',
6 'degree',
7 'average_neighbor_degree',
8 'iterated_average_neighbor_degree',
9 'betweenness_centrality',
10 'eccentricity',
11 'average_shortest_path_length',
12 'corrected_clustering_coefficient',
13 'corrected_average_neighbor_degree',
14 'corrected_iterated_average_neighbor_degree']
15
16 rdb = rd.StrictRedis(host='localhost', port=6379, db=0)
17
18
19 correlations = {}
20 for metric1 in metrics:
21 correlations[metric1] = {}
22 for metric2 in metrics:
23 correlations[metric1][metric2] = (0,0)
24 if metric1 == metric2:
25 correlations[metric1][metric2] = (1,0)
26 continue
27
28 dict_metric1 = dict(rdb.zrange(metric1, 0, -1, withscores=True, score_cast_func=float))
29 dict_metric2 = dict(rdb.zrange(metric2, 0, -1, withscores=True, score_cast_func=float))
30
31 values_metric1 = []
32 values_metric2 = []
33
34 for key in sorted(dict_metric1.iterkeys()):
35 values_metric1.append(dict_metric1[key])
36
37 for key in sorted(dict_metric2.iterkeys()):
38 values_metric2.append(dict_metric2[key])
39
40 correlations[metric1][metric2] = pearsonr(values_metric1,values_metric2)
41
42 for source in correlations:
43 for target in correlations[source]:
44 rdb.hset("correlations:"+source+":"+target, "correlation", correlations[source][target][0])
45 rdb.hset("correlations:"+source+":"+target, "confidence", correlations[source][target][1])
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:
git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main