File metric_calculator.py changed (mode: 100644) (index 281cf20..f4b2e6e) |
... |
... |
import config |
9 |
9 |
|
|
10 |
10 |
class MetricCalculator(object):
|
class MetricCalculator(object):
|
11 |
11 |
def __init__ (self, graph):
|
def __init__ (self, graph):
|
|
12 |
|
#class constructor
|
|
13 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
|
|
14 |
|
|
12 |
15 |
self.graph = graph
|
self.graph = graph
|
13 |
16 |
self.redis = rd.StrictRedis(host='localhost', port=6379, db=0)
|
self.redis = rd.StrictRedis(host='localhost', port=6379, db=0)
|
14 |
17 |
self.nodes = nx.nodes(graph)
|
self.nodes = nx.nodes(graph)
|
15 |
18 |
|
|
|
19 |
|
|
|
20 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access
|
16 |
21 |
self.node_index_key = config.node_index_key
|
self.node_index_key = config.node_index_key
|
17 |
22 |
self.metric_index_key = config.metric_index_key
|
self.metric_index_key = config.metric_index_key
|
18 |
23 |
self.score_index_key = config.score_index_key
|
self.score_index_key = config.score_index_key
|
|
... |
... |
class MetricCalculator(object): |
34 |
39 |
self.advanced_scores = config.advanced_scores
|
self.advanced_scores = config.advanced_scores
|
35 |
40 |
|
|
36 |
41 |
|
|
37 |
|
|
|
38 |
|
# self.node_index_key = 'all_nodes'
|
|
39 |
|
# self.metric_index_key = 'all_metrics'
|
|
40 |
|
# self.score_index_key = 'all_scores'
|
|
41 |
|
#
|
|
42 |
|
# self.node_neighbors_prefix = 'node_neighbors:'
|
|
43 |
|
# self.node_prefix = 'node_metrics:'
|
|
44 |
|
# self.metric_prefix = 'metric:'
|
|
45 |
|
# self.statistics_prefix = 'statistics:'
|
|
46 |
|
#
|
|
47 |
|
# self.normalization_suffix = '_normalized'
|
|
48 |
|
#
|
|
49 |
|
# # definition of all base metrics for which absolute values will be calculcated for each node in the first step
|
|
50 |
|
# # key is the name of the metric and value is the implemented method which exposes the required interface
|
|
51 |
|
# # interface: each method takes the node as the single parameter, performs the necessary calculation and
|
|
52 |
|
# # returns a float containing the value for the specified node
|
|
53 |
|
#
|
|
54 |
|
# self.metrics = { 'clustering_coefficient' : self.clustering_coefficient,
|
|
55 |
|
# 'degree' : self.degree,
|
|
56 |
|
# 'average_neighbor_degree' : self.average_neighbor_degree,
|
|
57 |
|
# 'iterated_average_neighbor_degree': self.iterated_average_neighbor_degree,
|
|
58 |
|
# 'betweenness_centrality' : self.betweenness_centrality,
|
|
59 |
|
# 'eccentricity' : self.eccentricity,
|
|
60 |
|
# 'average_shortest_path_length' : self.average_shortest_path_length
|
|
61 |
|
# }
|
|
62 |
|
#
|
|
63 |
|
#
|
|
64 |
|
# # some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
|
|
65 |
|
# # key is the metric name and value the method for correction
|
|
66 |
|
#
|
|
67 |
|
#
|
|
68 |
|
# self.advanced_metrics = { 'corrected_clustering_coefficient' : self.correct_clustering_coefficient,
|
|
69 |
|
# 'corrected_average_neighbor_degree' : self.correct_average_neighbor_degree,
|
|
70 |
|
# 'corrected_iterated_average_neighbor_degree': self.correct_iterated_average_neighbor_degree}
|
|
71 |
|
#
|
|
72 |
|
#
|
|
73 |
|
#
|
|
74 |
|
# # for every metric, a normalization method has to be specified
|
|
75 |
|
# # key is the name of the metric and value is the normalization method which also has to expose the required interface
|
|
76 |
|
# # interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
|
|
77 |
|
# # the method itself shall access the data which is required for normalization from the redis instance
|
|
78 |
|
# # and the corresponding keys/values for the specified metric
|
|
79 |
|
# # it shall then loop over all nodes and calculate the normalized value for the node and the metric
|
|
80 |
|
# # afterwards it should save the result to redis using "metric_name_normalized" as the key
|
|
81 |
|
# # the result is stored inside the node's hash for metrics
|
|
82 |
|
#
|
|
83 |
|
# # also needs to include corrected metrics with their respective names
|
|
84 |
|
# #
|
|
85 |
|
# self.normalization_methods = { 'clustering_coefficient' : self.min_max_normalization,
|
|
86 |
|
# 'corrected_clustering_coefficient' : self.min_max_normalization,
|
|
87 |
|
# 'degree' : self.min_max_normalization,
|
|
88 |
|
# 'average_neighbor_degree' : self.min_max_normalization,
|
|
89 |
|
# 'corrected_average_neighbor_degree' : self.min_max_normalization,
|
|
90 |
|
# 'iterated_average_neighbor_degree' : self.min_max_normalization,
|
|
91 |
|
# 'corrected_iterated_average_neighbor_degree': self.min_max_normalization,
|
|
92 |
|
# 'betweenness_centrality' : self.min_max_normalization,
|
|
93 |
|
# 'eccentricity' : self.inverse_min_max_normalization,
|
|
94 |
|
# 'average_shortest_path_length' : self.inverse_min_max_normalization
|
|
95 |
|
# }
|
|
96 |
|
#
|
|
97 |
|
#
|
|
98 |
|
# # the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
|
|
99 |
|
# # such scores can easily be defined here
|
|
100 |
|
# # note: names are not methods but redis keys
|
|
101 |
|
#
|
|
102 |
|
# self.scores = {'unified_risk_score': { #'corrected_clustering_coefficient': 0.2,
|
|
103 |
|
# 'degree_normalized': 0.25,
|
|
104 |
|
# 'corrected_average_neighbor_degree_normalized': 0.15,
|
|
105 |
|
# 'corrected_iterated_average_neighbor_degree_normalized': 0.1,
|
|
106 |
|
# 'betweenness_centrality_normalized': 0.25,
|
|
107 |
|
# 'eccentricity_normalized': 0.125,
|
|
108 |
|
# 'average_shortest_path_length_normalized': 0.125}
|
|
109 |
|
# }
|
|
110 |
|
#
|
|
111 |
|
#
|
|
112 |
|
# # other scores might require a more sophisticated algorithm to be calculated
|
|
113 |
|
# # such scores need to be added here and implemented like the example below
|
|
114 |
|
#
|
|
115 |
|
# self.advanced_scores = {'advanced_unified_risk_score': self.urs_clustering_coefficient_modification}
|
|
116 |
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
|
|
|
120 |
42 |
|
|
121 |
43 |
def start(self):
|
def start(self):
|
122 |
44 |
#clean all data in Redis
|
#clean all data in Redis
|
123 |
45 |
self.redis.flushdb()
|
self.redis.flushdb()
|
124 |
46 |
|
|
125 |
47 |
#index creation
|
#index creation
|
126 |
|
#self.index_nodes()
|
|
127 |
|
#self.index_neighbors()
|
|
128 |
|
#self.index_metrics()
|
|
129 |
|
#self.index_scores()
|
|
130 |
|
|
|
131 |
48 |
self.create_indexes()
|
self.create_indexes()
|
132 |
49 |
|
|
133 |
50 |
|
|
|
... |
... |
class MetricCalculator(object): |
145 |
62 |
#### INDEXING ####
|
#### INDEXING ####
|
146 |
63 |
##################
|
##################
|
147 |
64 |
def create_indexes(self):
|
def create_indexes(self):
|
|
65 |
|
#call methods defined in indexing.py
|
148 |
66 |
indexing.index_nodes(self)
|
indexing.index_nodes(self)
|
149 |
67 |
indexing.index_neighbors(self)
|
indexing.index_neighbors(self)
|
150 |
68 |
indexing.index_metrics(self)
|
indexing.index_metrics(self)
|
151 |
69 |
indexing.index_scores(self)
|
indexing.index_scores(self)
|
152 |
70 |
|
|
153 |
|
|
|
154 |
|
# def index_nodes(self):
|
|
155 |
|
# self.redis.sadd(self.node_index_key, *self.nodes)
|
|
156 |
|
#
|
|
157 |
|
# def index_neighbors(self):
|
|
158 |
|
# for node in self.nodes:
|
|
159 |
|
# node_neighbors = self.graph.neighbors(int(node))
|
|
160 |
|
# self.redis.sadd(self.node_neighbors_prefix+str(node), *node_neighbors)
|
|
161 |
|
#
|
|
162 |
|
# def index_metrics(self):
|
|
163 |
|
# for metric in self.metrics:
|
|
164 |
|
# self.redis.sadd(self.metric_index_key, metric)
|
|
165 |
|
#
|
|
166 |
|
# for advanced_metric in self.advanced_metrics:
|
|
167 |
|
# self.redis.sadd(self.metric_index_key, advanced_metric)
|
|
168 |
|
#
|
|
169 |
|
# def index_scores(self):
|
|
170 |
|
# for score in self.scores:
|
|
171 |
|
# self.redis.sadd(self.score_index_key, score)
|
|
172 |
|
#
|
|
173 |
|
# for advanced_score in self.advanced_scores:
|
|
174 |
|
# self.redis.sadd(self.score_index_key, advanced_score)
|
|
175 |
|
|
|
176 |
71 |
###########################
|
###########################
|
177 |
72 |
#### CALCULATION LOOPS ####
|
#### CALCULATION LOOPS ####
|
178 |
73 |
###########################
|
###########################
|
179 |
|
# loop through all defined metrics and call specified calculation method for each node
|
|
|
74 |
|
|
180 |
75 |
def calculate_metrics(self):
|
def calculate_metrics(self):
|
|
76 |
|
# loop through all defined metrics and call specified calculation method for each node
|
181 |
77 |
for metric_name in self.base_metrics:
|
for metric_name in self.base_metrics:
|
182 |
78 |
metric_method = self.base_metrics[metric_name]
|
metric_method = self.base_metrics[metric_name]
|
183 |
79 |
|
|
184 |
80 |
# loop through all nodes
|
# loop through all nodes
|
185 |
81 |
for node in self.nodes:
|
for node in self.nodes:
|
186 |
|
|
|
187 |
82 |
# call calculation method of supplied metric for current node
|
# call calculation method of supplied metric for current node
|
188 |
83 |
node = int(node)
|
node = int(node)
|
189 |
84 |
value = float(metric_method(self,node))
|
value = float(metric_method(self,node))
|
|
... |
... |
class MetricCalculator(object): |
194 |
89 |
#also store result to metric set
|
#also store result to metric set
|
195 |
90 |
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
196 |
91 |
|
|
197 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
|
|
92 |
|
|
198 |
93 |
def calculate_advanced_metrics(self):
|
def calculate_advanced_metrics(self):
|
|
94 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
199 |
95 |
for advanced_metric_name in self.advanced_metrics:
|
for advanced_metric_name in self.advanced_metrics:
|
200 |
96 |
metric_method = self.advanced_metrics[advanced_metric_name]
|
metric_method = self.advanced_metrics[advanced_metric_name]
|
|
97 |
|
|
|
98 |
|
# loop through all nodes
|
201 |
99 |
for node in self.nodes:
|
for node in self.nodes:
|
202 |
100 |
node = int(node)
|
node = int(node)
|
203 |
101 |
value = float(metric_method(self,node))
|
value = float(metric_method(self,node))
|
|
... |
... |
class MetricCalculator(object): |
225 |
123 |
normalization_method(self,metric_name)
|
normalization_method(self,metric_name)
|
226 |
124 |
|
|
227 |
125 |
|
|
228 |
|
|
|
229 |
|
|
|
230 |
|
# # normalizations
|
|
231 |
|
# # min max normalization
|
|
232 |
|
# def min_max_normalization(self,metric_name):
|
|
233 |
|
# #perform min max normalization of specified metric for all nodes
|
|
234 |
|
# #min_max normalization
|
|
235 |
|
# #get min and max from redis
|
|
236 |
|
# x_min = self.redis.zrange(metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
|
|
237 |
|
# x_max = self.redis.zrange(metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
|
|
238 |
|
#
|
|
239 |
|
# #print x_min
|
|
240 |
|
# #print x_max
|
|
241 |
|
#
|
|
242 |
|
# for node in self.nodes:
|
|
243 |
|
# if x_min == x_max:
|
|
244 |
|
# x_normalized = 1.0
|
|
245 |
|
# else:
|
|
246 |
|
# x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
|
|
247 |
|
# x_normalized = (x - x_min) / (x_max - x_min)
|
|
248 |
|
#
|
|
249 |
|
# #store value for node and metric
|
|
250 |
|
# self.redis.zadd(metric_name+self.normalization_suffix, x_normalized, str(node))
|
|
251 |
|
# self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
|
|
252 |
|
#
|
|
253 |
|
# #max min normalization
|
|
254 |
|
# def inverse_min_max_normalization(self,metric_name):
|
|
255 |
|
# x_min = self.redis.zrange(metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
|
|
256 |
|
# x_max = self.redis.zrange(metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
|
|
257 |
|
#
|
|
258 |
|
# for node in self.nodes:
|
|
259 |
|
# if x_min == x_max:
|
|
260 |
|
# x_normalized = 1.0
|
|
261 |
|
# else:
|
|
262 |
|
# x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
|
|
263 |
|
# x_normalized = (x_max - x) / (x_max - x_min)
|
|
264 |
|
#
|
|
265 |
|
# #store value for node and metric
|
|
266 |
|
# self.redis.zadd(metric_name+self.normalization_suffix, x_normalized, str(node))
|
|
267 |
|
# self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
|
|
268 |
|
#
|
|
269 |
126 |
def calculate_scores(self):
|
def calculate_scores(self):
|
270 |
127 |
for score_name in self.scores:
|
for score_name in self.scores:
|
271 |
128 |
metrics_with_weights = self.scores[score_name]
|
metrics_with_weights = self.scores[score_name]
|
|
... |
... |
class MetricCalculator(object): |
284 |
141 |
|
|
285 |
142 |
def calculate_advanced_scores(self):
|
def calculate_advanced_scores(self):
|
286 |
143 |
for advanced_score in self.advanced_scores:
|
for advanced_score in self.advanced_scores:
|
287 |
|
self.advanced_scores[advanced_score](self)
|
|
288 |
|
|
|
289 |
|
|
|
290 |
|
###################################################
|
|
291 |
|
# actual metrics and corrections etc. below
|
|
292 |
|
# must return value which can be converted to float
|
|
293 |
|
###################################################
|
|
294 |
|
#
|
|
295 |
|
# def clustering_coefficient(self,node):
|
|
296 |
|
# #in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
|
|
297 |
|
# #NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
|
|
298 |
|
# # in this case, just returning nx.clustering(self.graph, node) might be better
|
|
299 |
|
# if not hasattr(self, 'all_clustering_coefficients'):
|
|
300 |
|
# self.all_clustering_coefficients = nx.clustering(self.graph)
|
|
301 |
|
#
|
|
302 |
|
# #get the actual value from the pre-calculated hash
|
|
303 |
|
# return self.all_clustering_coefficients[node]
|
|
304 |
|
#
|
|
305 |
|
# def degree(self, node):
|
|
306 |
|
# return self.graph.degree(node)
|
|
307 |
|
#
|
|
308 |
|
#
|
|
309 |
|
# def average_neighbor_degree(self,node):
|
|
310 |
|
# # same caching technique as in self.clustering_coefficient
|
|
311 |
|
# # might also break for very large graphs
|
|
312 |
|
# # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go
|
|
313 |
|
#
|
|
314 |
|
# if not hasattr(self, 'all_average_neighbor_degrees'):
|
|
315 |
|
# self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
|
|
316 |
|
# return self.all_average_neighbor_degrees[node]
|
|
317 |
|
#
|
|
318 |
|
# def iterated_average_neighbor_degree(self, node):
|
|
319 |
|
#
|
|
320 |
|
# first_level_neighbors = self.graph.neighbors(node)
|
|
321 |
|
# second_level_neighbors = []
|
|
322 |
|
#
|
|
323 |
|
# # get all two-hop nodes
|
|
324 |
|
# for first_level_neighbor in first_level_neighbors:
|
|
325 |
|
# current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
|
|
326 |
|
# second_level_neighbors.extend(current_second_level_neighbors)
|
|
327 |
|
#
|
|
328 |
|
# #remove one-hop nodes and self
|
|
329 |
|
# relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
|
|
330 |
|
#
|
|
331 |
|
# degree_sum = 0
|
|
332 |
|
# for relevant_node in relevant_nodes:
|
|
333 |
|
# degree_sum += self.graph.degree(relevant_node)
|
|
334 |
|
#
|
|
335 |
|
# return float(degree_sum)/float(len(relevant_nodes))
|
|
336 |
|
#
|
|
337 |
|
# def betweenness_centrality(self, node):
|
|
338 |
|
# if not hasattr(self, 'all_betweenness_centralities'):
|
|
339 |
|
# self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
|
|
340 |
|
# return self.all_betweenness_centralities[node]
|
|
341 |
|
#
|
|
342 |
|
# def eccentricity(self, node):
|
|
343 |
|
# if not hasattr(self, 'all_eccentricities'):
|
|
344 |
|
# self.all_eccentricities = nx.eccentricity(self.graph)
|
|
345 |
|
# return self.all_eccentricities[node]
|
|
346 |
|
#
|
|
347 |
|
# def average_shortest_path_length(self, node):
|
|
348 |
|
# # caching average_shortest_path_length for all nodes at one failed
|
|
349 |
|
# # already switched to single calculation
|
|
350 |
|
#
|
|
351 |
|
# #get all shortest path lengths
|
|
352 |
|
# all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)
|
|
353 |
|
#
|
|
354 |
|
# #calculate average
|
|
355 |
|
# sum_of_lengths = 0
|
|
356 |
|
# for target in all_shortest_path_lengths_for_node:
|
|
357 |
|
# sum_of_lengths += all_shortest_path_lengths_for_node[target]
|
|
358 |
|
#
|
|
359 |
|
# return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)
|
|
360 |
|
#
|
|
361 |
|
#
|
|
362 |
|
##############
|
|
363 |
|
## corrections
|
|
364 |
|
##############
|
|
365 |
|
# def correct_clustering_coefficient(self,node):
|
|
366 |
|
# clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
|
|
367 |
|
# degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
|
|
368 |
|
# corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
|
|
369 |
|
#
|
|
370 |
|
# return corrected_cc
|
|
371 |
|
#
|
|
372 |
|
# #def correct_clustering_coefficient(self):
|
|
373 |
|
#
|
|
374 |
|
# # for node in self.nodes:
|
|
375 |
|
# # clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
|
|
376 |
|
# # degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
|
|
377 |
|
#
|
|
378 |
|
# # corrected_cc = clustering_coefficient * (degree * clustering_coefficient) / float(4)
|
|
379 |
|
#
|
|
380 |
|
# # self.redis.hset(self.node_prefix+str(node), 'corrected_clustering_coefficient', corrected_cc)
|
|
381 |
|
# # self.redis.zadd('corrected_clustering_coefficient', corrected_cc, str(node))
|
|
382 |
|
#
|
|
383 |
|
# def correct_average_neighbor_degree(self,node):
|
|
384 |
|
# avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
|
|
385 |
|
#
|
|
386 |
|
# neighbors = self.graph.neighbors(node)
|
|
387 |
|
# number_of_neighbors = float(len(neighbors))
|
|
388 |
|
# neighbor_degrees = []
|
|
389 |
|
# for neighbor in neighbors:
|
|
390 |
|
# neighbor_degrees.append(self.graph.degree(neighbor))
|
|
391 |
|
#
|
|
392 |
|
# #using numpy median and standard deviation implementation
|
|
393 |
|
# numpy_neighbor_degrees = np.array(neighbor_degrees)
|
|
394 |
|
# median = np.median(numpy_neighbor_degrees)
|
|
395 |
|
# standard_deviation = np.std(numpy_neighbor_degrees)
|
|
396 |
|
#
|
|
397 |
|
# if avgnd == 0.0 or number_of_neighbors == 0.0 or standard_deviation == 0.0:
|
|
398 |
|
# return avgnd
|
|
399 |
|
# else:
|
|
400 |
|
# return avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
|
|
401 |
|
#
|
|
402 |
|
#
|
|
403 |
|
# def correct_iterated_average_neighbor_degree(self, node):
|
|
404 |
|
# avgnd = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
|
|
405 |
|
#
|
|
406 |
|
# first_level_neighbors = self.graph.neighbors(node)
|
|
407 |
|
# second_level_neighbors = []
|
|
408 |
|
#
|
|
409 |
|
# # get all two-hop nodes
|
|
410 |
|
# for first_level_neighbor in first_level_neighbors:
|
|
411 |
|
# current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
|
|
412 |
|
# second_level_neighbors.extend(current_second_level_neighbors)
|
|
413 |
|
#
|
|
414 |
|
# #remove one-hop neighbors and self
|
|
415 |
|
# relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
|
|
416 |
|
#
|
|
417 |
|
# number_of_nodes = len(relevant_nodes)
|
|
418 |
|
# node_degrees = []
|
|
419 |
|
# for rel_node in relevant_nodes:
|
|
420 |
|
# node_degrees.append(self.graph.degree(rel_node))
|
|
421 |
|
#
|
|
422 |
|
# numpy_node_degrees = np.array(node_degrees)
|
|
423 |
|
# median = np.median(numpy_node_degrees)
|
|
424 |
|
# standard_deviation = np.std(numpy_node_degrees)
|
|
425 |
|
#
|
|
426 |
|
# if avgnd == 0.0 or number_of_nodes == 0.0 or standard_deviation == 0.0:
|
|
427 |
|
# return avgnd
|
|
428 |
|
# else:
|
|
429 |
|
# return avgnd + ( ((median - avgnd) / standard_deviation) / number_of_nodes ) * avgnd
|
|
430 |
|
#
|
|
431 |
|
#
|
|
432 |
|
#
|
|
433 |
|
#
|
|
434 |
|
#################
|
|
435 |
|
##advanced scores
|
|
436 |
|
#################
|
|
437 |
|
#
|
|
438 |
|
# def urs_clustering_coefficient_modification(self):
|
|
439 |
|
#
|
|
440 |
|
# #caching of values
|
|
441 |
|
# all_ccs_normalized = dict(self.redis.zrange('corrected_clustering_coefficient'+self.normalization_suffix, 0, -1, withscores=True, score_cast_func=float))
|
|
442 |
|
# all_urs = dict(self.redis.zrange('unified_risk_score', 0, -1, withscores=True, score_cast_func=float))
|
|
443 |
|
#
|
|
444 |
|
# urs_percentile_10 = np.percentile(all_urs.values(), 10)
|
|
445 |
|
# urs_percentile_90 = np.percentile(all_urs.values(), 90)
|
|
446 |
|
#
|
|
447 |
|
# for node in self.nodes:
|
|
448 |
|
# #cc_normalized = float(self.redis.hget(self.node_prefix+str(node),'corrected_clustering_coefficient'+self.normalization_suffix))
|
|
449 |
|
# #urs = float(self.redis.hget(self.node_prefix+str(node),'unified_risk_score'))
|
|
450 |
|
#
|
|
451 |
|
# cc_normalized = all_ccs_normalized[str(node)]
|
|
452 |
|
# urs = all_urs[str(node)]
|
|
453 |
|
#
|
|
454 |
|
#
|
|
455 |
|
# if (urs >= urs_percentile_90 or urs <= urs_percentile_10):
|
|
456 |
|
# if (cc_normalized >= 0.25):
|
|
457 |
|
# advanced_unified_risk_score = ((urs * 3.0) + cc_normalized) / 4.0
|
|
458 |
|
# else:
|
|
459 |
|
# advanced_unified_risk_score = urs
|
|
460 |
|
# else:
|
|
461 |
|
# advanced_unified_risk_score = urs
|
|
462 |
|
#
|
|
463 |
|
# #save for node
|
|
464 |
|
# self.redis.hset(self.node_prefix+str(node), 'advanced_unified_risk_score', advanced_unified_risk_score)
|
|
465 |
|
# #save for metric
|
|
466 |
|
# self.redis.zadd('advanced_unified_risk_score', advanced_unified_risk_score, str(node))
|
|
|
144 |
|
self.advanced_scores[advanced_score](self)
|
|
145 |
|
|
467 |
146 |
|
|
468 |
147 |
#############
|
#############
|
469 |
148 |
# statistics
|
# statistics
|
|
... |
... |
class MetricCalculator(object): |
488 |
167 |
|
|
489 |
168 |
statistics.calculate_correlations(self)
|
statistics.calculate_correlations(self)
|
490 |
169 |
|
|
491 |
|
#
|
|
492 |
|
#
|
|
493 |
|
# def calculate_statistics_for_absolute_values(self,metric):
|
|
494 |
|
# all_values = dict(self.redis.zrange(metric, 0, -1, withscores=True, score_cast_func=float)).values()
|
|
495 |
|
# min_value = np.min(np.array(all_values))
|
|
496 |
|
# max_value = np.max(all_values)
|
|
497 |
|
#
|
|
498 |
|
# average = np.average(all_values)
|
|
499 |
|
# median = np.median(all_values)
|
|
500 |
|
# standard_deviation = np.std(all_values)
|
|
501 |
|
#
|
|
502 |
|
# self.redis.hset(self.statistics_prefix+str(metric), 'min', min_value)
|
|
503 |
|
# self.redis.hset(self.statistics_prefix+str(metric), 'max', max_value)
|
|
504 |
|
# self.redis.hset(self.statistics_prefix+str(metric), 'average', average)
|
|
505 |
|
# self.redis.hset(self.statistics_prefix+str(metric), 'median', median)
|
|
506 |
|
# self.redis.hset(self.statistics_prefix+str(metric), 'standard_deviation', standard_deviation)
|
|
507 |
|
#
|
|
508 |
|
# def calculate_statistics_for_normalized_values(self,metric):
|
|
509 |
|
# all_values = dict(self.redis.zrange(metric+self.normalization_suffix, 0, -1, withscores=True, score_cast_func=float)).values()
|
|
510 |
|
#
|
|
511 |
|
# min_value = np.min(all_values)
|
|
512 |
|
# max_value = np.max(all_values)
|
|
513 |
|
#
|
|
514 |
|
# average = np.average(all_values)
|
|
515 |
|
# median = np.median(all_values)
|
|
516 |
|
# standard_deviation = np.std(all_values)
|
|
517 |
|
#
|
|
518 |
|
# self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'min', min_value)
|
|
519 |
|
# self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'max', max_value)
|
|
520 |
|
# self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'average', average)
|
|
521 |
|
# self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'median', median)
|
|
522 |
|
# self.redis.hset(self.statistics_prefix+str(metric)+self.normalization_suffix, 'standard_deviation', standard_deviation)
|
|
523 |
|
#
|
|
524 |
|
#
|
|
525 |
|
# def calculate_correlations(self):
|
|
526 |
|
# m = self.metrics.keys()
|
|
527 |
|
# c = self.corrections.keys()
|
|
528 |
|
#
|
|
529 |
|
# metrics = m + c
|
|
530 |
|
#
|
|
531 |
|
# correlations = {}
|
|
532 |
|
# for metric1 in metrics:
|
|
533 |
|
# correlations[metric1] = {}
|
|
534 |
|
# for metric2 in metrics:
|
|
535 |
|
# correlations[metric1][metric2] = (0,0)
|
|
536 |
|
# if metric1 == metric2:
|
|
537 |
|
# correlations[metric1][metric2] = (1,0)
|
|
538 |
|
# continue
|
|
539 |
|
#
|
|
540 |
|
# dict_metric1 = dict(self.redis.zrange(metric1, 0, -1, withscores=True, score_cast_func=float))
|
|
541 |
|
# dict_metric2 = dict(self.redis.zrange(metric2, 0, -1, withscores=True, score_cast_func=float))
|
|
542 |
|
# values_metric1 = []
|
|
543 |
|
# values_metric2 = []
|
|
544 |
|
#
|
|
545 |
|
# for key in sorted(dict_metric1.iterkeys()):
|
|
546 |
|
# values_metric1.append(dict_metric1[key])
|
|
547 |
|
#
|
|
548 |
|
# for key in sorted(dict_metric2.iterkeys()):
|
|
549 |
|
# values_metric2.append(dict_metric2[key])
|
|
550 |
|
#
|
|
551 |
|
# correlations[metric1][metric2] = pearsonr(values_metric1,values_metric2)
|
|
552 |
|
#
|
|
553 |
|
# values_metric1 = []
|
|
554 |
|
# values_metric2 = []
|
|
555 |
|
#
|
|
556 |
|
# for source in correlations:
|
|
557 |
|
# for target in correlations[source]:
|
|
558 |
|
# self.redis.hset("correlations:"+source+":"+target, "correlation", correlations[source][target][0])
|
|
559 |
|
# self.redis.hset("correlations:"+source+":"+target, "confidence", correlations[source][target][1]) |
|