File metric_calculator.py changed (mode: 100644) (index f4b2e6e..bc83a4e) |
1 |
|
import networkx as nx
|
|
2 |
|
import redis as rd
|
|
3 |
|
import numpy as np
|
|
4 |
|
import indexing
|
|
5 |
|
import statistics
|
|
6 |
|
import normalizations
|
|
7 |
|
import config
|
|
8 |
|
|
|
9 |
|
|
|
10 |
|
class MetricCalculator(object):
|
|
11 |
|
def __init__ (self, graph):
|
|
12 |
|
#class constructor
|
|
13 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
|
|
14 |
|
|
|
15 |
|
self.graph = graph
|
|
16 |
|
self.redis = rd.StrictRedis(host='localhost', port=6379, db=0)
|
|
17 |
|
self.nodes = nx.nodes(graph)
|
|
18 |
|
|
|
19 |
|
|
|
20 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access
|
|
21 |
|
self.node_index_key = config.node_index_key
|
|
22 |
|
self.metric_index_key = config.metric_index_key
|
|
23 |
|
self.score_index_key = config.score_index_key
|
|
24 |
|
|
|
25 |
|
self.node_neighbors_prefix = config.node_neighbors_prefix
|
|
26 |
|
self.node_prefix = config.node_prefix
|
|
27 |
|
self.metric_prefix = config.metric_prefix
|
|
28 |
|
self.score_prefix = config.score_prefix
|
|
29 |
|
self.statistics_prefix = config.statistics_prefix
|
|
30 |
|
|
|
31 |
|
self.normalization_suffix = config.normalization_suffix
|
|
32 |
|
|
|
33 |
|
self.base_metrics = config.base_metrics
|
|
34 |
|
self.advanced_metrics = config.advanced_metrics
|
|
35 |
|
|
|
36 |
|
self.normalization_methods = config.normalization_methods
|
|
37 |
|
|
|
38 |
|
self.scores = config.scores
|
|
39 |
|
self.advanced_scores = config.advanced_scores
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
|
def start(self):
|
|
44 |
|
#clean all data in Redis
|
|
45 |
|
self.redis.flushdb()
|
|
46 |
|
|
|
47 |
|
#index creation
|
|
48 |
|
self.create_indexes()
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
#main calculations
|
|
52 |
|
self.calculate_metrics()
|
|
53 |
|
self.calculate_advanced_metrics()
|
|
54 |
|
self.normalize_metrics()
|
|
55 |
|
self.calculate_scores()
|
|
56 |
|
self.calculate_advanced_scores()
|
|
57 |
|
|
|
58 |
|
#statistics
|
|
59 |
|
self.calculate_statistics()
|
|
60 |
|
|
|
61 |
|
##################
|
|
62 |
|
#### INDEXING ####
|
|
63 |
|
##################
|
|
64 |
|
def create_indexes(self):
|
|
65 |
|
#call methods defined in indexing.py
|
|
66 |
|
indexing.index_nodes(self)
|
|
67 |
|
indexing.index_neighbors(self)
|
|
68 |
|
indexing.index_metrics(self)
|
|
69 |
|
indexing.index_scores(self)
|
|
70 |
|
|
|
71 |
|
###########################
|
|
72 |
|
#### CALCULATION LOOPS ####
|
|
73 |
|
###########################
|
|
74 |
|
|
|
75 |
|
def calculate_metrics(self):
|
|
76 |
|
# loop through all defined metrics and call specified calculation method for each node
|
|
77 |
|
for metric_name in self.base_metrics:
|
|
78 |
|
metric_method = self.base_metrics[metric_name]
|
|
79 |
|
|
|
80 |
|
# loop through all nodes
|
|
81 |
|
for node in self.nodes:
|
|
82 |
|
# call calculation method of supplied metric for current node
|
|
83 |
|
node = int(node)
|
|
84 |
|
value = float(metric_method(self,node))
|
|
85 |
|
|
|
86 |
|
#store result in node values
|
|
87 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value)
|
|
88 |
|
|
|
89 |
|
#also store result to metric set
|
|
90 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
|
|
91 |
|
|
|
92 |
|
|
|
93 |
|
def calculate_advanced_metrics(self):
|
|
94 |
|
# loop through all defined_advanced_metrics and call specified calculation method
|
|
95 |
|
for advanced_metric_name in self.advanced_metrics:
|
|
96 |
|
metric_method = self.advanced_metrics[advanced_metric_name]
|
|
97 |
|
|
|
98 |
|
# loop through all nodes
|
|
99 |
|
for node in self.nodes:
|
|
100 |
|
node = int(node)
|
|
101 |
|
value = float(metric_method(self,node))
|
|
102 |
|
|
|
103 |
|
#store result in node values
|
|
104 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
|
|
105 |
|
|
|
106 |
|
#also store result to metric set
|
|
107 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
|
|
108 |
|
|
|
109 |
|
|
|
110 |
|
# loop through all defined normalizations and call respective normalization method
|
|
111 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash
|
|
112 |
|
def normalize_metrics(self):
|
|
113 |
|
#fallback normalization: min-max
|
|
114 |
|
|
|
115 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
|
|
116 |
|
|
|
117 |
|
for metric_name in all_metrics:
|
|
118 |
|
if self.normalization_methods.has_key(metric_name):
|
|
119 |
|
normalization_method = self.normalization_methods[metric_name]
|
|
120 |
|
else:
|
|
121 |
|
#fallback normalization is min-max
|
|
122 |
|
normalization_method = normalizations.min_max
|
|
123 |
|
normalization_method(self,metric_name)
|
|
124 |
|
|
|
125 |
|
|
|
126 |
|
def calculate_scores(self):
|
|
127 |
|
for score_name in self.scores:
|
|
128 |
|
metrics_with_weights = self.scores[score_name]
|
|
129 |
|
|
|
130 |
|
for node in self.nodes:
|
|
131 |
|
score_value = 0.0
|
|
132 |
|
|
|
133 |
|
# get normalized values
|
|
134 |
|
for metric in metrics_with_weights:
|
|
135 |
|
weight = self.scores[score_name][metric]
|
|
136 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
|
|
137 |
|
score_value += weight * value
|
|
138 |
|
|
|
139 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value)
|
|
140 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
|
|
141 |
|
|
|
142 |
|
def calculate_advanced_scores(self):
|
|
143 |
|
for advanced_score in self.advanced_scores:
|
|
144 |
|
self.advanced_scores[advanced_score](self)
|
|
145 |
|
|
|
146 |
|
|
|
147 |
|
#############
|
|
148 |
|
# statistics
|
|
149 |
|
#############
|
|
150 |
|
|
|
151 |
|
def calculate_statistics(self):
|
|
152 |
|
for metric in self.base_metrics:
|
|
153 |
|
#absolute and normalized
|
|
154 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
|
|
155 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
|
|
156 |
|
|
|
157 |
|
for advanced_metric in self.advanced_metrics:
|
|
158 |
|
#absolute and normalized
|
|
159 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
|
|
160 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
|
|
161 |
|
|
|
162 |
|
for score in self.scores:
|
|
163 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score)
|
|
164 |
|
|
|
165 |
|
for advanced_score in self.advanced_scores:
|
|
166 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
|
|
167 |
|
|
|
168 |
|
statistics.calculate_correlations(self)
|
|
169 |
|
|
|
|
1 |
|
import networkx as nx |
|
2 |
|
import redis as rd |
|
3 |
|
import numpy as np |
|
4 |
|
import indexing |
|
5 |
|
import statistics |
|
6 |
|
import normalizations |
|
7 |
|
import config |
|
8 |
|
|
|
9 |
|
|
|
10 |
|
class MetricCalculator(object): |
|
11 |
|
def __init__ (self, graph): |
|
12 |
|
#class constructor |
|
13 |
|
#define required class variables such as the graph to work on, the redis connection and the nodes of the graph |
|
14 |
|
|
|
15 |
|
self.graph = graph |
|
16 |
|
self.redis = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1) |
|
17 |
|
self.nodes = nx.nodes(graph) |
|
18 |
|
|
|
19 |
|
|
|
20 |
|
# configuration variables are read from the config file and are also saved to class variables for easy access |
|
21 |
|
self.node_index_key = config.node_index_key |
|
22 |
|
self.metric_index_key = config.metric_index_key |
|
23 |
|
self.score_index_key = config.score_index_key |
|
24 |
|
|
|
25 |
|
self.node_neighbors_prefix = config.node_neighbors_prefix |
|
26 |
|
self.node_prefix = config.node_prefix |
|
27 |
|
self.metric_prefix = config.metric_prefix |
|
28 |
|
self.score_prefix = config.score_prefix |
|
29 |
|
self.statistics_prefix = config.statistics_prefix |
|
30 |
|
|
|
31 |
|
self.normalization_suffix = config.normalization_suffix |
|
32 |
|
|
|
33 |
|
self.base_metrics = config.base_metrics |
|
34 |
|
self.advanced_metrics = config.advanced_metrics |
|
35 |
|
|
|
36 |
|
self.normalization_methods = config.normalization_methods |
|
37 |
|
|
|
38 |
|
self.scores = config.scores |
|
39 |
|
self.advanced_scores = config.advanced_scores |
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
|
def start(self): |
|
44 |
|
#clean all data in Redis |
|
45 |
|
self.redis.flushdb() |
|
46 |
|
|
|
47 |
|
#index creation |
|
48 |
|
self.create_indexes() |
|
49 |
|
|
|
50 |
|
|
|
51 |
|
#main calculations |
|
52 |
|
self.calculate_metrics() |
|
53 |
|
self.calculate_advanced_metrics() |
|
54 |
|
self.normalize_metrics() |
|
55 |
|
self.calculate_scores() |
|
56 |
|
self.calculate_advanced_scores() |
|
57 |
|
|
|
58 |
|
#statistics |
|
59 |
|
self.calculate_statistics() |
|
60 |
|
|
|
61 |
|
################## |
|
62 |
|
#### INDEXING #### |
|
63 |
|
################## |
|
64 |
|
def create_indexes(self): |
|
65 |
|
#call methods defined in indexing.py |
|
66 |
|
indexing.index_nodes(self) |
|
67 |
|
indexing.index_neighbors(self) |
|
68 |
|
indexing.index_metrics(self) |
|
69 |
|
indexing.index_scores(self) |
|
70 |
|
|
|
71 |
|
########################### |
|
72 |
|
#### CALCULATION LOOPS #### |
|
73 |
|
########################### |
|
74 |
|
|
|
75 |
|
def calculate_metrics(self): |
|
76 |
|
# loop through all defined metrics and call specified calculation method for each node |
|
77 |
|
for metric_name in self.base_metrics: |
|
78 |
|
metric_method = self.base_metrics[metric_name] |
|
79 |
|
|
|
80 |
|
# loop through all nodes |
|
81 |
|
for node in self.nodes: |
|
82 |
|
# call calculation method of supplied metric for current node |
|
83 |
|
node = int(node) |
|
84 |
|
value = float(metric_method(self,node)) |
|
85 |
|
|
|
86 |
|
#store result in node values |
|
87 |
|
self.redis.hset(self.node_prefix+str(node), metric_name, value) |
|
88 |
|
|
|
89 |
|
#also store result to metric set |
|
90 |
|
self.redis.zadd(self.metric_prefix+metric_name, value, str(node)) |
|
91 |
|
|
|
92 |
|
|
|
93 |
|
def calculate_advanced_metrics(self): |
|
94 |
|
# loop through all defined_advanced_metrics and call specified calculation method |
|
95 |
|
for advanced_metric_name in self.advanced_metrics: |
|
96 |
|
metric_method = self.advanced_metrics[advanced_metric_name] |
|
97 |
|
|
|
98 |
|
# loop through all nodes |
|
99 |
|
for node in self.nodes: |
|
100 |
|
node = int(node) |
|
101 |
|
value = float(metric_method(self,node)) |
|
102 |
|
|
|
103 |
|
#store result in node values |
|
104 |
|
self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value) |
|
105 |
|
|
|
106 |
|
#also store result to metric set |
|
107 |
|
self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node)) |
|
108 |
|
|
|
109 |
|
|
|
110 |
|
# loop through all defined normalizations and call respective normalization method |
|
111 |
|
# no default normalizations for metrics not listed in the "normalization_methods" hash |
|
112 |
|
def normalize_metrics(self): |
|
113 |
|
#fallback normalization: min-max |
|
114 |
|
|
|
115 |
|
all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items()) |
|
116 |
|
|
|
117 |
|
for metric_name in all_metrics: |
|
118 |
|
if self.normalization_methods.has_key(metric_name): |
|
119 |
|
normalization_method = self.normalization_methods[metric_name] |
|
120 |
|
else: |
|
121 |
|
#fallback normalization is min-max |
|
122 |
|
normalization_method = normalizations.min_max |
|
123 |
|
normalization_method(self,metric_name) |
|
124 |
|
|
|
125 |
|
|
|
126 |
|
def calculate_scores(self): |
|
127 |
|
for score_name in self.scores: |
|
128 |
|
metrics_with_weights = self.scores[score_name] |
|
129 |
|
|
|
130 |
|
for node in self.nodes: |
|
131 |
|
score_value = 0.0 |
|
132 |
|
|
|
133 |
|
# get normalized values |
|
134 |
|
for metric in metrics_with_weights: |
|
135 |
|
weight = self.scores[score_name][metric] |
|
136 |
|
value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix)) |
|
137 |
|
score_value += weight * value |
|
138 |
|
|
|
139 |
|
self.redis.hset(self.node_prefix+str(node),score_name, score_value) |
|
140 |
|
self.redis.zadd(self.score_prefix+score_name, score_value, str(node)) |
|
141 |
|
|
|
142 |
|
def calculate_advanced_scores(self): |
|
143 |
|
for advanced_score in self.advanced_scores: |
|
144 |
|
self.advanced_scores[advanced_score](self) |
|
145 |
|
|
|
146 |
|
|
|
147 |
|
############# |
|
148 |
|
# statistics |
|
149 |
|
############# |
|
150 |
|
|
|
151 |
|
def calculate_statistics(self): |
|
152 |
|
for metric in self.base_metrics: |
|
153 |
|
#absolute and normalized |
|
154 |
|
statistics.calculate_statistics(self, metric, self.metric_prefix+metric) |
|
155 |
|
statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix) |
|
156 |
|
|
|
157 |
|
for advanced_metric in self.advanced_metrics: |
|
158 |
|
#absolute and normalized |
|
159 |
|
statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric) |
|
160 |
|
statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix) |
|
161 |
|
|
|
162 |
|
for score in self.scores: |
|
163 |
|
statistics.calculate_statistics(self, score, self.score_prefix+score) |
|
164 |
|
|
|
165 |
|
for advanced_score in self.advanced_scores: |
|
166 |
|
statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score) |
|
167 |
|
|
|
168 |
|
statistics.calculate_correlations(self) |
|
169 |
|
|