Subject SHA-1 Author Date
Fix redis DB configuration 5ccfec34b4bcb3323071ea5667da2a375e219a3e Vasilis Ververis 2017-04-10 19:42:42
Fix redis DB in config dffd5d2deb0c5d8043ff43fc3fe6cb1efd3bc990 Vasilis Ververis 2017-04-10 10:23:15
Set proper host address to redis, remove extra spaces, set ff to unix dd87734e46d0756f7d7f48b5de5591ffd487c8ab Vasilis Ververis 2017-04-04 22:48:03
Add redis conf, datasets, gitignore, convert files 34fd00db0525b875e2f9afbe5a10af28fe06b03b Vasilis Ververis 2017-03-16 09:52:30
Commig cdebeb923331f9081529a023c00cb1f0543e3d55 Mathias Ehlert 2014-12-07 16:07:08
Commit 34fd00db0525b875e2f9afbe5a10af28fe06b03b - Add redis conf, datasets, gitignore, convert files
Add 2012 and test datasets
Convert files format to unix
Add gitignore and remove .pyc files
Set redis configuration

Author: Vasilis Ververis
Author date (UTC): 2017-03-16 09:52
Committer: Vasilis Ververis
Commit date (UTC): 2017-03-16 09:52
Tree: 4609b5d10e5a44675a11de945b8b0a04b47740fe
Parents: cdebeb923331f9081529a023c00cb1f0543e3d55
File Lines added Lines deleted
.gitignore 1 0
advancedscores.pyc 0 0
config.py 88 84
config.pyc 0 0
data/Dataset_2012.txt 244757 0
data/test_dataset.txt 10 0
file_importer.pyc 0 0
indexing.pyc 0 0
metric_calculator.py 180 179
metric_calculator.pyc 0 0
metrics.pyc 0 0
normalizations.pyc 0 0
statistics.pyc 0 0

File .gitignore added (mode: 100644) (index 0000000..0d20b64)
1 *.pyc

File advancedscores.pyc deleted (index ce98f24..0000000)

File config.py changed (mode: 100644) (index 8410e0b..c18bb46)
1 #config.py
2 import metrics
3 import normalizations
4 import advancedscores
5
6 #redis keys for indexes and values
7 node_index_key = 'all_nodes'
8 metric_index_key = 'all_metrics'
9 score_index_key = 'all_scores'
10
11 node_neighbors_prefix = 'node_neighbors:'
12 node_prefix = 'node_metrics:'
13 metric_prefix = 'metric:'
14 score_prefix = 'score:'
15 statistics_prefix = 'statistics:'
16
17 normalization_suffix = '_normalized'
18
19 # definition of all base metrics for which absolute values will be calculcated for each node in the first step
20 # key is the name of the metric and value is the implemented method which exposes the required interface
21 # interface: each method takes the node as the single parameter, performs the necessary calculation and
22 # returns a float containing the value for the specified node
23
24 base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient,
25 'degree' : metrics.degree,
26 'average_neighbor_degree' : metrics.average_neighbor_degree,
27 'iterated_average_neighbor_degree': metrics.iterated_average_neighbor_degree,
28 # 'betweenness_centrality' : metrics.betweenness_centrality,
29 'betweenness_centrality_gt' : metrics.betweenness_centrality_gt,
30 # 'eccentricity' : metrics.eccentricity,
31 'average_shortest_path_length' : metrics.average_shortest_path_length
32 }
33
34
35 # some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
36 # key is the metric name and value the method for correction
37
38 advanced_metrics = {'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient,
39 'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree,
40 'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree}
41
42
43 # for every metric, a normalization method has to be specified
44 # key is the name of the metric and value is the normalization method which also has to expose the required interface
45 # interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
46 # the method itself shall access the data which is required for normalization from the redis instance
47 # and the corresponding keys/values for the specified metric
48 # it shall then loop over all nodes and calculate the normalized value for the node and the metric
49 # afterwards it should save the result to redis using "metric_name_normalized" as the key
50 # the result is stored inside the node's hash for metrics
51
52 # also needs to include corrected metrics with their respective names
53 #
54 normalization_methods = { 'clustering_coefficient' : normalizations.min_max,
55 'corrected_clustering_coefficient' : normalizations.min_max,
56 'degree' : normalizations.min_max,
57 'average_neighbor_degree' : normalizations.min_max,
58 'corrected_average_neighbor_degree' : normalizations.min_max,
59 'iterated_average_neighbor_degree' : normalizations.min_max,
60 'corrected_iterated_average_neighbor_degree': normalizations.min_max,
61 # 'betweenness_centrality' : normalizations.min_max,
62 'betweenness_centrality_gt' : normalizations.min_max,
63 # 'eccentricity' : normalizations.max_min,
64 'average_shortest_path_length' : normalizations.max_min
65 }
66
67
68 # the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
69 # such scores can easily be defined here
70 # note: names are not methods but redis keys
71
72 scores = {'unified_risk_score': { 'degree': 0.25,
73 'corrected_average_neighbor_degree': 0.15,
74 'corrected_iterated_average_neighbor_degree': 0.1,
75 'betweenness_centrality_gt': 0.25,
76 # 'eccentricity': 0.125,
77 'average_shortest_path_length': 0.25}
78 }
79
80
81 # other scores might require a more sophisticated algorithm to be calculated
82 # such scores need to be added here and implemented like the example below
83
84 advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score}
1 #config.py
2 import metrics
3 import normalizations
4 import advancedscores
5
6 #redis keys for indexes and values
7 node_index_key = 'all_nodes'
8 metric_index_key = 'all_metrics'
9 score_index_key = 'all_scores'
10
11 node_neighbors_prefix = 'node_neighbors:'
12 node_prefix = 'node_metrics:'
13 metric_prefix = 'metric:'
14 score_prefix = 'score:'
15 statistics_prefix = 'statistics:'
16
17 normalization_suffix = '_normalized'
18
19 # definition of all base metrics for which absolute values will be calculcated for each node in the first step
20 # key is the name of the metric and value is the implemented method which exposes the required interface
21 # interface: each method takes the node as the single parameter, performs the necessary calculation and
22 # returns a float containing the value for the specified node
23
24 base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient,
25 'degree' : metrics.degree,
26 'average_neighbor_degree' : metrics.average_neighbor_degree,
27 'iterated_average_neighbor_degree': metrics.iterated_average_neighbor_degree,
28 # 'betweenness_centrality' : metrics.betweenness_centrality,
29 'betweenness_centrality_gt' : metrics.betweenness_centrality_gt,
30 # 'eccentricity' : metrics.eccentricity,
31 'average_shortest_path_length' : metrics.average_shortest_path_length
32 }
33
34
35 # some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
36 # key is the metric name and value the method for correction
37
38 advanced_metrics = {'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient,
39 'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree,
40 'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree}
41
42
43 # for every metric, a normalization method has to be specified
44 # key is the name of the metric and value is the normalization method which also has to expose the required interface
45 # interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
46 # the method itself shall access the data which is required for normalization from the redis instance
47 # and the corresponding keys/values for the specified metric
48 # it shall then loop over all nodes and calculate the normalized value for the node and the metric
49 # afterwards it should save the result to redis using "metric_name_normalized" as the key
50 # the result is stored inside the node's hash for metrics
51
52 # also needs to include corrected metrics with their respective names
53 #
54 normalization_methods = { 'clustering_coefficient' : normalizations.min_max,
55 'corrected_clustering_coefficient' : normalizations.min_max,
56 'degree' : normalizations.min_max,
57 'average_neighbor_degree' : normalizations.min_max,
58 'corrected_average_neighbor_degree' : normalizations.min_max,
59 'iterated_average_neighbor_degree' : normalizations.min_max,
60 'corrected_iterated_average_neighbor_degree': normalizations.min_max,
61 # 'betweenness_centrality' : normalizations.min_max,
62 'betweenness_centrality_gt' : normalizations.min_max,
63 # 'eccentricity' : normalizations.max_min,
64 'average_shortest_path_length' : normalizations.max_min
65 }
66
67
68 # the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
69 # such scores can easily be defined here
70 # note: names are not methods but redis keys
71
72 scores = {'unified_risk_score': { 'degree': 0.25,
73 'corrected_average_neighbor_degree': 0.15,
74 'corrected_iterated_average_neighbor_degree': 0.1,
75 'betweenness_centrality_gt': 0.25,
76 # 'eccentricity': 0.125,
77 'average_shortest_path_length': 0.25}
78 }
79
80
81 # other scores might require a more sophisticated algorithm to be calculated
82 # such scores need to be added here and implemented like the example below
83
84 advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score}
85
86 # Redis
87 REDIS_PORT = 6379
88 REDIS_HOST = 'redis'

File config.pyc deleted (index 80b9cca..0000000)

The diff for file data/Dataset_2012.txt is too big (244757 changes) and cannot be shown.

File data/test_dataset.txt added (mode: 100644) (index 0000000..47cab8b)
1 1 2
2 1 3
3 1 4
4 2 5
5 2 6
6 3 4
7 3 6
8 4 7
9 5 8
10 6 9

File file_importer.pyc deleted (index 0d69976..0000000)

File indexing.pyc deleted (index fca8491..0000000)

File metric_calculator.py changed (mode: 100644) (index c2cc665..888c63b)
1 import networkx as nx
2 import graph_tool.all as gt
3 import redis as rd
4 import numpy as np
5 import indexing
6 import statistics
7 import normalizations
8 import config
9 import datetime as dt
10
11
12 class MetricCalculator(object):
13 def __init__ (self, graph,graph_gt):
14 #class constructor
15 #define required class variables such as the graph to work on, the redis connection and the nodes of the graph
16
17 print ('Starting metric_calculator!')
18 self.graph = graph
19 self.graph_gt = graph_gt
20 # self.graph_gt_labels = graph_gt_labels
21 self.redis = rd.StrictRedis(host='localhost', port=6379, db=0)
22 self.nodes = nx.nodes(graph)
23
24
25 # configuration variables are read from the config file and are also saved to class variables for easy access
26 self.node_index_key = config.node_index_key
27 self.metric_index_key = config.metric_index_key
28 self.score_index_key = config.score_index_key
29
30 self.node_neighbors_prefix = config.node_neighbors_prefix
31 self.node_prefix = config.node_prefix
32 self.metric_prefix = config.metric_prefix
33 self.score_prefix = config.score_prefix
34 self.statistics_prefix = config.statistics_prefix
35
36 self.normalization_suffix = config.normalization_suffix
37
38 self.base_metrics = config.base_metrics
39 self.advanced_metrics = config.advanced_metrics
40
41 self.normalization_methods = config.normalization_methods
42
43 self.scores = config.scores
44 self.advanced_scores = config.advanced_scores
45
46
47
48 def start(self):
49 #clean all data in Redis
50 self.redis.flushdb()
51
52 #index creation
53 self.create_indexes()
54
55
56 #main calculations
57 self.calculate_metrics()
58 self.calculate_advanced_metrics()
59 self.normalize_metrics()
60 self.calculate_scores()
61 self.calculate_advanced_scores()
62
63 #statistics
64 self.calculate_statistics()
65
66 ##################
67 #### INDEXING ####
68 ##################
69 def create_indexes(self):
70 #call methods defined in indexing.py
71 indexing.index_nodes(self)
72 indexing.index_neighbors(self)
73 indexing.index_metrics(self)
74 indexing.index_scores(self)
75
76 ###########################
77 #### CALCULATION LOOPS ####
78 ###########################
79
80 def calculate_metrics(self):
81 # loop through all defined metrics and call specified calculation method for each node
82 print ('Starting calculate_metrics')
83 for metric_name in self.base_metrics:
84 metric_method = self.base_metrics[metric_name]
85
86 # loop through all nodes
87 for node in self.nodes:
88 # call calculation method of supplied metric for current node
89 node = int(node)
90 value = float(metric_method(self,node))
91
92 #store result in node values
93 self.redis.hset(self.node_prefix+str(node), metric_name, value)
94
95 #also store result to metric set
96 self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
97
98
99 def calculate_advanced_metrics(self):
100 # loop through all defined_advanced_metrics and call specified calculation method
101 print ('Starting calculate_advanced_metrics')
102 for advanced_metric_name in self.advanced_metrics:
103 metric_method = self.advanced_metrics[advanced_metric_name]
104
105 # loop through all nodes
106 for node in self.nodes:
107 node = int(node)
108 value = float(metric_method(self,node))
109
110 #store result in node values
111 self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
112
113 #also store result to metric set
114 self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
115
116
117 # loop through all defined normalizations and call respective normalization method
118 # no default normalizations for metrics not listed in the "normalization_methods" hash
119 def normalize_metrics(self):
120 #fallback normalization: min-max
121 print ('Starting normalize_metrics')
122 all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
123
124 for metric_name in all_metrics:
125 if self.normalization_methods.has_key(metric_name):
126 normalization_method = self.normalization_methods[metric_name]
127 else:
128 #fallback normalization is min-max
129 normalization_method = normalizations.min_max
130 normalization_method(self,metric_name)
131
132
133 def calculate_scores(self):
134 print ('Starting calculate_scores')
135 for score_name in self.scores:
136 metrics_with_weights = self.scores[score_name]
137
138 for node in self.nodes:
139 score_value = 0.0
140
141 # get normalized values
142 for metric in metrics_with_weights:
143 weight = self.scores[score_name][metric]
144 value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
145 score_value += weight * value
146
147 self.redis.hset(self.node_prefix+str(node),score_name, score_value)
148 self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
149
150 def calculate_advanced_scores(self):
151 print ('Starting calculate_advanced_scores')
152 for advanced_score in self.advanced_scores:
153 self.advanced_scores[advanced_score](self)
154
155
156 #############
157 # statistics
158 #############
159
160 def calculate_statistics(self):
161 print ('Starting calculate_statistics')
162 for metric in self.base_metrics:
163 #absolute and normalized
164 statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
165 statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
166
167 for advanced_metric in self.advanced_metrics:
168 #absolute and normalized
169 statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
170 statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
171
172 for score in self.scores:
173 statistics.calculate_statistics(self, score, self.score_prefix+score)
174
175 for advanced_score in self.advanced_scores:
176 statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
177
178 statistics.calculate_correlations(self)
179
1 import networkx as nx
2 import graph_tool.all as gt
3 import redis as rd
4 import numpy as np
5 import indexing
6 import statistics
7 import normalizations
8 import config
9 import datetime as dt
10
11
12 class MetricCalculator(object):
13 def __init__ (self, graph,graph_gt):
14 #class constructor
15 #define required class variables such as the graph to work on, the redis connection and the nodes of the graph
16
17 print ('Starting metric_calculator!')
18 self.graph = graph
19 self.graph_gt = graph_gt
20 self.redis = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1)
21 # self.graph_gt_labels = graph_gt_labels
22 self.redis = rd.StrictRedis(host='localhost', port=6379, db=0)
23 self.nodes = nx.nodes(graph)
24
25
26 # configuration variables are read from the config file and are also saved to class variables for easy access
27 self.node_index_key = config.node_index_key
28 self.metric_index_key = config.metric_index_key
29 self.score_index_key = config.score_index_key
30
31 self.node_neighbors_prefix = config.node_neighbors_prefix
32 self.node_prefix = config.node_prefix
33 self.metric_prefix = config.metric_prefix
34 self.score_prefix = config.score_prefix
35 self.statistics_prefix = config.statistics_prefix
36
37 self.normalization_suffix = config.normalization_suffix
38
39 self.base_metrics = config.base_metrics
40 self.advanced_metrics = config.advanced_metrics
41
42 self.normalization_methods = config.normalization_methods
43
44 self.scores = config.scores
45 self.advanced_scores = config.advanced_scores
46
47
48
49 def start(self):
50 #clean all data in Redis
51 self.redis.flushdb()
52
53 #index creation
54 self.create_indexes()
55
56
57 #main calculations
58 self.calculate_metrics()
59 self.calculate_advanced_metrics()
60 self.normalize_metrics()
61 self.calculate_scores()
62 self.calculate_advanced_scores()
63
64 #statistics
65 self.calculate_statistics()
66
67 ##################
68 #### INDEXING ####
69 ##################
70 def create_indexes(self):
71 #call methods defined in indexing.py
72 indexing.index_nodes(self)
73 indexing.index_neighbors(self)
74 indexing.index_metrics(self)
75 indexing.index_scores(self)
76
77 ###########################
78 #### CALCULATION LOOPS ####
79 ###########################
80
81 def calculate_metrics(self):
82 # loop through all defined metrics and call specified calculation method for each node
83 print ('Starting calculate_metrics')
84 for metric_name in self.base_metrics:
85 metric_method = self.base_metrics[metric_name]
86
87 # loop through all nodes
88 for node in self.nodes:
89 # call calculation method of supplied metric for current node
90 node = int(node)
91 value = float(metric_method(self,node))
92
93 #store result in node values
94 self.redis.hset(self.node_prefix+str(node), metric_name, value)
95
96 #also store result to metric set
97 self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
98
99
100 def calculate_advanced_metrics(self):
101 # loop through all defined_advanced_metrics and call specified calculation method
102 print ('Starting calculate_advanced_metrics')
103 for advanced_metric_name in self.advanced_metrics:
104 metric_method = self.advanced_metrics[advanced_metric_name]
105
106 # loop through all nodes
107 for node in self.nodes:
108 node = int(node)
109 value = float(metric_method(self,node))
110
111 #store result in node values
112 self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
113
114 #also store result to metric set
115 self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
116
117
118 # loop through all defined normalizations and call respective normalization method
119 # no default normalizations for metrics not listed in the "normalization_methods" hash
120 def normalize_metrics(self):
121 #fallback normalization: min-max
122 print ('Starting normalize_metrics')
123 all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
124
125 for metric_name in all_metrics:
126 if self.normalization_methods.has_key(metric_name):
127 normalization_method = self.normalization_methods[metric_name]
128 else:
129 #fallback normalization is min-max
130 normalization_method = normalizations.min_max
131 normalization_method(self,metric_name)
132
133
134 def calculate_scores(self):
135 print ('Starting calculate_scores')
136 for score_name in self.scores:
137 metrics_with_weights = self.scores[score_name]
138
139 for node in self.nodes:
140 score_value = 0.0
141
142 # get normalized values
143 for metric in metrics_with_weights:
144 weight = self.scores[score_name][metric]
145 value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
146 score_value += weight * value
147
148 self.redis.hset(self.node_prefix+str(node),score_name, score_value)
149 self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
150
151 def calculate_advanced_scores(self):
152 print ('Starting calculate_advanced_scores')
153 for advanced_score in self.advanced_scores:
154 self.advanced_scores[advanced_score](self)
155
156
157 #############
158 # statistics
159 #############
160
161 def calculate_statistics(self):
162 print ('Starting calculate_statistics')
163 for metric in self.base_metrics:
164 #absolute and normalized
165 statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
166 statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
167
168 for advanced_metric in self.advanced_metrics:
169 #absolute and normalized
170 statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
171 statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
172
173 for score in self.scores:
174 statistics.calculate_statistics(self, score, self.score_prefix+score)
175
176 for advanced_score in self.advanced_scores:
177 statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
178
179 statistics.calculate_correlations(self)
180

File metric_calculator.pyc deleted (index 4854fd8..0000000)

File metrics.pyc deleted (index 0700311..0000000)

File normalizations.pyc deleted (index b814a04..0000000)

File statistics.pyc deleted (index b85b89d..0000000)
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:
git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a pull request:
... clone the repository ...
... make some changes and some commits ...
git push origin master