RocketGit

coria / coria-backend (public) (License: Unspecified) (since 2017-02-23) (hash sha1)

No description available

Clone URLs: https://rocketgit.com/user/coria/coria-backend ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend git://git.rocketgit.com/user/coria/coria-backend

feature/coria-ver1 feature/coria-ver1.5 feature/coria-ver2 master

List of commits:

Subject	Hash	Author	Date (UTC)
added the backend code	7cd35621d7adef15ecc88a7ad61d2709530f8a52	lizzzi111	2016-05-12 21:29:37

Commit 7cd35621d7adef15ecc88a7ad61d2709530f8a52 - added the backend code
Author: lizzzi111
Author date (UTC): 2016-05-12 21:29
Committer name: lizzzi111
Committer date (UTC): 2016-05-12 21:29
Parent(s):
Signer:
Signing key:
Signing status: N
Tree: 56901bdaf52bd3c5df85936e4e14abd8dfa92857

File	Lines added	Lines deleted
README.md	6	0
RU_ASN.txt	5570	0
RedisHelpers/del_keys_in_db.py	24	0
RedisHelpers/flush_db.py	19	0
RedisHelpers/move_keys_from_db_to_db.py	36	0
RedisHelpers/redis_test.py	23	0
RedisHelpers/search_db.py	20	0
advancedscores.py	33	0
advancedscores.pyc	0	0
change_output_file.py	52	0
config.py	134	0
config.pyc	0	0
file_importer.py	115	0
file_importer.pyc	0	0
graph_test.py	22	0
gt_file_importer.py	25	0
gt_file_importer.pyc	0	0
gt_start.py	55	0
indexing.py	27	0
indexing.pyc	0	0
log	644344	0
metric_calculator.py	403	0
metric_calculator.pyc	0	0
metrics.py	361	0
metrics.pyc	0	0
nohup.out	1252439	0
normalizations.py	37	0
normalizations.pyc	0	0
percolation.py	265	0
percolation.pyc	0	0
ru_file_importer.py	38	0
ru_file_importer.pyc	0	0
ru_metric_calculator.py	413	0
ru_metric_calculator.pyc	0	0
ru_start.py	56	0
start.py	56	0
statistics.py	58	0
statistics.pyc	0	0
test.py	31	0
visualization.py	160	0
visualization.pyc	0	0

File README.md added (mode: 100644) (index 0000000..cb89d83)
	1	coria-backend
	2	=============
	3
	4	Connectivity Risk Analysis Python Backend CORIA 2.0
	5	usage: start.py [-h] filename
	6

The diff for file RU_ASN.txt is too big (5570 changes) and cannot be shown.

File RedisHelpers/del_keys_in_db.py added (mode: 100644) (index 0000000..1007f72)
	1	import argparse
	2	import redis as rd
	3
	4	parser = argparse.ArgumentParser(description='Delete keys from the Redis DB matching a certain pattern')
	5	parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
	6	parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
	7	args = parser.parse_args()
	8	# reading arguments from parser
	9	db_index = args.db_index
	10	pattern = args.pattern
	11
	12	# defining redis
	13	print 'Redis database:',db_index
	14	redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
	15	# finding keys matching pattern
	16	print 'finding keys matching pattern', pattern
	17	keys = redis.keys(pattern=pattern)
	18	print len(keys),'keys matching pattern',pattern,'found'
	19	# deleting keys
	20	print 'deleting keys matching pattern', pattern
	21	for key in keys:
	22	redis.delete(key)
	23	print len(keys),'keys matching pattern',pattern,'deleted'
	24

File RedisHelpers/flush_db.py added (mode: 100644) (index 0000000..776409f)
	1	import argparse
	2	import redis as rd
	3
	4	parser = argparse.ArgumentParser(description='Flush the Redis DB.')
	5	parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
	6	args = parser.parse_args()
	7	db_index = args.db_index
	8
	9	redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
	10
	11	# ask to clean all data in Redis
	12	flush_flag = 'Flushing'
	13	while (flush_flag != 'y' and flush_flag != 'n'):
	14	flush_flag = raw_input("Are you sure you want to flush database "+str(db_index)+"? [y/n]")
	15	if flush_flag == 'y':
	16	print 'Flushing database',str(db_index)
	17	redis.flushdb()
	18	else:
	19	print 'Aborting database flush'

File RedisHelpers/move_keys_from_db_to_db.py added (mode: 100644) (index 0000000..a5cc125)
	1	import argparse
	2	import redis as rd
	3
	4	parser = argparse.ArgumentParser(description='Migrate keys matching a pattern from one DB to another.')
	5	parser.add_argument('db_source',metavar='db_source',type=int,help='Source Database Index')
	6	parser.add_argument('db_target',metavar='db_target',type=int,help='Target Database Index')
	7	parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
	8	args = parser.parse_args()
	9	# reading arguments from parser
	10	db_source = args.db_source
	11	db_target = args.db_target
	12	pattern = args.pattern
	13
	14	redis_source = rd.StrictRedis(host='localhost', port=6379, db=db_source)
	15	print "Redis source database is",str(db_source)
	16	redis_target = rd.StrictRedis(host='localhost', port=6379, db=db_target)
	17	print "Redis target database is",str(db_target)
	18	print "Redis 2 defined"
	19
	20	# clean all data in Redis
	21	flush_flag = 'Flushing'
	22	while (flush_flag != 'y' and flush_flag != 'n'):
	23	flush_flag = raw_input("Would you like to flush the target database before continuing? [y/n]")
	24	if flush_flag == 'y':
	25	redis_target.flushdb()
	26	print "Redis target database flushed"
	27
	28	keys = redis_source.keys(pattern)
	29	print len(keys),'keys matching pattern',pattern,'found'
	30
	31	for key in keys:
	32	print key
	33	redis_source.move(str(key), db_target)
	34	#redis_source.migrate(host='localhost', port=6379, key=str(k), destination-db=db_target, timeout=3000,copy=1)
	35
	36	print len(keys),"keys from Redis source migrated to Redis target"

File RedisHelpers/redis_test.py added (mode: 100644) (index 0000000..51ff08d)
	1	import argparse
	2	import redis as rd
	3
	4	parser = argparse.ArgumentParser(description='Find keys matching a pattern in DB.')
	5	#parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
	6	parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
	7	args = parser.parse_args()
	8	# reading arguments from parser
	9	pattern = args.pattern
	10	keys = []
	11	for i in range(0,10):
	12	db_index = i
	13
	14	redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
	15
	16	temp_keys = redis.keys(pattern)
	17	print len(temp_keys),'\tkeys matching pattern',pattern,'found in database',db_index
	18	for key in temp_keys:
	19	keys.append(key)
	20
	21	#for key in keys:
	22	#print key
	23	print len(keys),'\tkeys matching pattern',pattern,'found in total'

File RedisHelpers/search_db.py added (mode: 100644) (index 0000000..3ca1070)
	1	import argparse
	2	import redis as rd
	3
	4	parser = argparse.ArgumentParser(description='Find keys matching a pattern in DB.')
	5	parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
	6	parser.add_argument('pattern',metavar='pattern',type=str,help='Pattern for Key Search')
	7	args = parser.parse_args()
	8	# reading arguments from parser
	9	db_index = args.db_index
	10	pattern = args.pattern
	11
	12	redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
	13	print "Redis database index:",str(db_index)
	14
	15	keys = redis.keys(pattern)
	16	print len(keys),'keys matching pattern',pattern,'found'
	17
	18	for key in keys:
	19	print key
	20	print "\n",len(keys),'keys matching pattern',pattern,'found'

File advancedscores.py added (mode: 100644) (index 0000000..489636a)
	1	# advancedscores.py
	2	import numpy as np
	3
	4	################
	5	#advanced scores
	6	################
	7
	8	def adv_unified_risk_score(self):
	9
	10	#caching of all values in dictionaries
	11	all_ccs_normalized = dict(self.redis.zrange(self.metric_prefix+'corrected_clustering_coefficient'+self.normalization_suffix, 0, -1, withscores=True, score_cast_func=float))
	12	all_urs = dict(self.redis.zrange(self.score_prefix+'unified_risk_score', 0, -1, withscores=True, score_cast_func=float))
	13
	14	urs_percentile_10 = np.percentile(all_urs.values(), 10)
	15	urs_percentile_90 = np.percentile(all_urs.values(), 90)
	16
	17	for node in self.nodes:
	18	cc_normalized = all_ccs_normalized[str(node)]
	19	urs = all_urs[str(node)]
	20
	21
	22	if (urs >= urs_percentile_90 or urs <= urs_percentile_10):
	23	if (cc_normalized >= 0.25):
	24	advanced_unified_risk_score = ((urs * 3.0) + cc_normalized) / 4.0
	25	else:
	26	advanced_unified_risk_score = urs
	27	else:
	28	advanced_unified_risk_score = urs
	29
	30	#save for node
	31	self.redis.hset(self.node_prefix+str(node), 'advanced_unified_risk_score', advanced_unified_risk_score)
	32	#save for score
	33	self.redis.zadd(self.score_prefix+'advanced_unified_risk_score', advanced_unified_risk_score, str(node))

File advancedscores.pyc added (mode: 100644) (index 0000000..51d9396)

File change_output_file.py added (mode: 100644) (index 0000000..985dee5)
	1	import argparse
	2	#import graph_tool.all as gt
	3	#import redis as rd
	4	#import numpy as np
	5
	6	parser = argparse.ArgumentParser(description='Change duration output file.')
	7	parser.add_argument('file_name',metavar='file_name',type=str,help='File Name')
	8	args = parser.parse_args()
	9	file_name = args.file_name
	10
	11	print 'Opening file',file_name
	12	input_file = open(file_name)
	13
	14
	15	counter = -6
	16	info = []
	17	body = []
	18	for line in input_file:
	19	if counter < -1:
	20	print 'Reading info'
	21	info.append([field for field in line.strip().split("\t")])
	22	counter += 1
	23	elif counter == -1:
	24	print 'Reading headers'
	25	headers = line.strip().split("\t")
	26	counter += 1
	27	else:
	28	body.append([field for field in line.strip().split("\t")])
	29	counter += 1
	30	print 'Reading row',counter,'of body'
	31	print 'Closing input file'
	32	input_file.close
	33
	34	print 'Sorting table'
	35	body.sort(key=lambda row: row[3], reverse=True)
	36	print 'Germanising: Replacing \".\" with \",\"'
	37	body = [map(lambda item: str(item).replace(".",","),row) for row in body]
	38
	39	print 'Writing in new file'
	40	output_file = open("_changed.".join(file_name.split(".")),"w")
	41	for row in info:
	42	print 'Writing row:',"\t".join(row)
	43	output_file.write("\t".join(row)+"\n")
	44	output_file.write("\t".join(headers)+"\n")
	45	for row in body:
	46	#for item in row:
	47	# str(item).replace(".",",")
	48	print 'Writing row:',"\t".join(row)
	49	output_file.write("\t".join(row)+"\n")
	50
	51	output_file.close
	52

File config.py added (mode: 100644) (index 0000000..59cc2dd)
	1	#config.py
	2	import metrics
	3	import normalizations
	4	import advancedscores
	5	import percolation
	6	import visualization
	7
	8	#redis keys for indexes and values
	9	graph_index_key = 'all_graphs'
	10
	11	info_index_key = 'general_info'
	12	node_index_key = 'all_nodes'
	13	metric_index_key = 'all_metrics'
	14	score_index_key = 'all_scores'
	15	percolation_index_key = 'all_percolation_modes'
	16	layout_index_key = 'all_layouts'
	17
	18	node_neighbors_prefix = 'node_neighbors:'
	19	node_prefix = 'node_metrics:'
	20	metric_prefix = 'metric:'
	21	score_prefix = 'score:'
	22	statistics_prefix = 'statistics:'
	23	percolation_prefix = 'percolation:'
	24
	25	normalization_suffix = '_normalized'
	26
	27
	28	# definition of all base metrics for which absolute values will be calculcated for each node in the first step
	29	# key is the name of the metric and value is the implemented method which exposes the required interface
	30	# interface: each method takes the node as the single parameter, performs the necessary calculation and
	31	# returns a float containing the value for the specified node
	32
	33	base_metrics = { 'clustering_coefficient' : metrics.clustering_coefficient,
	34	'degree' : metrics.degree,
	35	# 'degree_(gt)' : metrics.degree_gt,
	36	'average_neighbor_degree' : metrics.average_neighbor_degree,
	37	'iterated_average_neighbor_degree' : metrics.iterated_average_neighbor_degree,
	38	# 'iterated_average_neighbor_degree_(gt)': metrics.iterated_average_neighbor_degree,
	39	# 'betweenness_centrality' : metrics.betweenness_centrality,
	40	'betweenness_centrality_(gt)' : metrics.betweenness_centrality_gt,
	41	# 'eccentricity' : metrics.eccentricity,
	42	'eccentricity_(gt)' : metrics.eccentricity_gt,
	43	# 'eccentricity_(gt)_s' : metrics.eccentricity_gt_s,
	44	# 'average_shortest_path_length' : metrics.average_shortest_path_length,
	45	'average_shortest_path_length_(gt)' : metrics.average_shortest_path_length_gt,
	46	# 'average_shortest_path_length_(gt)_s' : metrics.average_shortest_path_length_gt_small_graphs,
	47	'eigenvector_centrality_(gt)' : metrics.eigenvector_centrality_gt,
	48	# 'eigenvector_centrality' : metrics.eigenvector_centrality,
	49	# 'deterioration' : metrics.deterioration
	50	}
	51
	52
	53	# some metrics might require some corrections or post processing which relies on the value of other metrics or normalizations
	54	# key is the metric name and value the method for correction
	55
	56	advanced_metrics = {'corrected_clustering_coefficient' : metrics.correct_clustering_coefficient,
	57	'corrected_average_neighbor_degree' : metrics.correct_average_neighbor_degree,
	58	'corrected_iterated_average_neighbor_degree': metrics.correct_iterated_average_neighbor_degree}
	59
	60
	61	# for every metric, a normalization method has to be specified
	62	# key is the name of the metric and value is the normalization method which also has to expose the required interface
	63	# interface: normalization methods, take the name of the (absolute) metric as the single argument, no return value is required
	64	# the method itself shall access the data which is required for normalization from the redis instance
	65	# and the corresponding keys/values for the specified metric
	66	# it shall then loop over all nodes and calculate the normalized value for the node and the metric
	67	# afterwards it should save the result to redis using "metric_name_normalized" as the key
	68	# the result is stored inside the node's hash for metrics
	69
	70	# also needs to include corrected metrics with their respective names
	71	#
	72	normalization_methods = { 'clustering_coefficient' : normalizations.min_max,
	73	'corrected_clustering_coefficient' : normalizations.min_max,
	74	'degree' : normalizations.min_max,
	75	'degree_(gt)' : normalizations.min_max,
	76	'average_neighbor_degree' : normalizations.min_max,
	77	'corrected_average_neighbor_degree' : normalizations.min_max,
	78	'iterated_average_neighbor_degree' : normalizations.min_max,
	79	'iterated_average_neighbor_degree_(gt)' : normalizations.min_max,
	80	'corrected_iterated_average_neighbor_degree': normalizations.min_max,
	81	'betweenness_centrality' : normalizations.min_max,
	82	'betweenness_centrality_(gt)' : normalizations.min_max,
	83	'eccentricity' : normalizations.max_min,
	84	'eccentricity_(gt)' : normalizations.max_min,
	85	'eccentricity_(gt)_s' : normalizations.max_min,
	86	'average_shortest_path_length' : normalizations.max_min,
	87	'average_shortest_path_length_(gt)' : normalizations.max_min,
	88	'average_shortest_path_length_(gt)_s' : normalizations.max_min,
	89	'eigenvector_centrality_(gt)' : normalizations.min_max,
	90	'eigenvector_centrality' : normalizations.min_max,
	91	'deterioration' : normalizations.min_max
	92	}
	93
	94
	95	# the easiest case for a score is a combination of normalized metric values with a weight which adds up to 1
	96	# such scores can easily be defined here
	97	# note: names are not methods but redis keys
	98
	99	scores = {'unified_risk_score': { 'degree': 0.25,
	100	'corrected_average_neighbor_degree': 0.15,
	101	'corrected_iterated_average_neighbor_degree': 0.1,
	102	'betweenness_centrality_(gt)': 0.25,
	103	# 'eccentricity': 0.125,
	104	'average_shortest_path_length_(gt)': 0.25}
	105	}
	106
	107
	108	# other scores might require a more sophisticated algorithm to be calculated
	109	# such scores need to be added here and implemented like the example below
	110
	111	advanced_scores = {'advanced_unified_risk_score': advancedscores.adv_unified_risk_score}
	112
	113
	114	# these are the different percolation modes with name as key and method as value
	115	# advanced modes have sub-modes for e.g. each metric
	116
	117	percolation_modes = {'failure': percolation.failure,
	118	'random_walk': percolation.random_walk,
	119	'russian_shutoff': percolation.russian
	120	}
	121
	122	advanced_percolation_modes = {'target_list': percolation.target_list,
	123	'hybrid_mode': percolation.hybrid_mode
	124	}
	125
	126	# layouts for graph visualization.
	127	# note 1: ARF does not seem to work with most graphs (error message: non-invertible matrix)
	128	# note 2: Fruchtermann-Rheingold layout (FRUCHT) takes up a high percentrage of computation time
	129	visualization_layouts = {#'SFDP': visualization.sfdp,
	130	'Radial': visualization.radial,
	131	#'Random': visualization.random,
	132	#'ARF': visualization.arf,
	133	#'Fruchterman_Reingold':visualization.frucht
	134	}

File config.pyc added (mode: 100644) (index 0000000..04b1176)

File file_importer.py added (mode: 100644) (index 0000000..88c66d8)
	1	import networkx as nx
	2	import graph_tool.all as gt
	3
	4	class FileImporter(object):
	5	def __init__(self,filename):
	6	# initialize data file to parse and new empty graph
	7	print 'Starting file importer!'
	8	self.data_file = open(filename)
	9	self.graph = nx.Graph()
	10	self.graph_gt = gt.Graph(directed=False)
	11	self.graph_gt_labels = self.graph_gt.new_vertex_property("double")
	12
	13	def read(self):
	14	for line in self.data_file:
	15	print "Parsing line",line
	16	self.parse_line(line)
	17	return self.graph
	18
	19	def read_gt(self):
	20	return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
	21
	22	def parse_line(self, line):
	23	# split each line on tabstop
	24	# first field specifies the source node
	25	# second field specifies the target node
	26
	27	fields = line.strip().split("\t")
	28	from_node = int(fields[0])
	29	to_node = int(fields[1])
	30
	31	# print('\n')
	32	# print('From node is',from_node)
	33	# print('To node is',to_node)
	34	# add edge to the networkx graph
	35	if (from_node <> to_node):
	36	self.graph.add_edge(from_node, to_node)
	37	# print('Network X graph has the following number of nodes',self.graph.number_of_nodes())
	38	# print('Network X graph has the following number of edges',self.graph.number_of_edges())
	39
	40
	41
	42	#add edge to the graph_tool graph and create a property map of labels
	43	#check if nodes are already present and create new ones if not
	44	#temp = gt.Graph(directed=False)
	45	#temp_name = temp.new_vertex_property("string")
	46	temp = self.graph_gt
	47	temp_name = self.graph_gt_labels
	48
	49	check = None
	50	if (from_node <> to_node): #check if from_node is the same as to_node
	51	index_from = gt.find_vertex(temp,temp_name,from_node)
	52	# print('Index from is',index_from)
	53	index_to = gt.find_vertex(temp,temp_name,to_node)
	54	# print('Index to is',index_to)
	55	if (index_from == [] and index_to == []):
	56	# print('No idences are found')
	57	c1 = temp.add_vertex()
	58	temp_name[temp.vertex(c1)] = from_node
	59	# print('Temp_name is now',temp_name[temp.vertex(c1)])
	60	c2 = temp.add_vertex()
	61	temp_name[temp.vertex(c2)] = to_node
	62	# print('Temp_name is now',temp_name[temp.vertex(c2)])
	63	if (index_from <> [] and index_to == []) :
	64	# print('Index from is')
	65	# print(index_from[0])
	66	c1 = index_from[0]
	67	#print('C1 is',c1)
	68	c2 = temp.add_vertex()
	69	#print('C2 is'),
	70	#print(c2)
	71	temp_name[temp.vertex(c2)] = to_node
	72	# print('Temp_name is now',temp_name[temp.vertex(c2)])
	73	if (index_to <> [] and index_from ==[]) :
	74	# print('Index to is')
	75	# print(index_to[0])
	76	c1 = temp.add_vertex()
	77	c2 = index_to[0]
	78	temp_name[temp.vertex(c1)] = from_node
	79	# print('Temp_name is now',temp_name[temp.vertex(c1)])
	80	if (index_from <> [] and index_to <> []) :
	81	# print('Both vertices found')
	82	c1 = index_to[0]
	83	c2 = index_from[0]
	84	check = temp.edge(c1,c2) #check if the edge is already present
	85	# print('Check is',check)
	86	if (check == None):
	87	# print("Adding edge between",c1,"and",c2)
	88	temp.add_edge(c1, c2)
	89
	90	#print(temp_name)
	91	self.graph_gt = temp
	92	self.graph_gt_labels = temp_name
	93
	94	# Check whether GT and NetworkX graphs have the same number of nodes and edges
	95	# if (self.graph_gt.num_vertices() <> self.graph.number_of_nodes()):
	96	# print('Unequal number of vertices detected at from node',from_node,'to node',to_node)
	97	# print('Number of vertices in Gt Graph is',self.graph_gt.num_vertices())
	98	# print('Number of vertices in NetworkX is',self.graph.number_of_nodes())
	99	# else:
	100	# print('Equal number of vertices in both graphs')
	101
	102	# if (self.graph_gt.num_edges() <> self.graph.number_of_edges()):
	103	# print('Unequal number of edges detected at from node',from_node,'to node',to_node)
	104	# print('Number of vertices in Gt Graph is',self.graph_gt.num_edges())
	105	# print('Number of vertices in NetworkX is',self.graph.number_of_edges())
	106	# else:
	107	# print('Equal number of edges in both graphs')
	108
	109	# if (self.graph.number_of_nodes() <> self.graph_gt.
	110	# print('Graph tool graph is',self.graph_gt)
	111	# print('Graph tool labels map is',self.graph_gt_labels)
	112
	113
	114
	115

File file_importer.pyc added (mode: 100644) (index 0000000..9b19358)

File graph_test.py added (mode: 100644) (index 0000000..df288a7)
	1	import argparse
	2	import graph_tool.all as gt
	3	import datetime as dt
	4	#import redis as rd
	5	#import numpy as np
	6
	7	parser = argparse.ArgumentParser(description='Writing summary of graph-tool gt-file')
	8	parser.add_argument('file_name',metavar='file_name',type=str,help='File Name')
	9	args = parser.parse_args()
	10	file_name = args.file_name
	11	#start_time = dt.datetime.now()
	12
	13	print 'Opening file',file_name
	14
	15	g = gt.load_graph(file_name)
	16	#delta = dt.datetime.now() - start_time
	17	#delta_s = delta.total_seconds()
	18
	19	print "# properties:",g.list_properties()
	20	print "# vertices:",g.num_vertices()
	21	print "# edges:",g.num_edges()
	22	#print "time taken:",delta_s

File gt_file_importer.py added (mode: 100644) (index 0000000..ae318be)
	1	import networkx as nx
	2	import graph_tool.all as gt
	3
	4	class FileImporter(object):
	5	def __init__(self,filename):
	6	# read graph from *.gt file (graph-tool file) and initialize empty networkx graph
	7	print 'Starting file importer!'
	8	self.graph_gt = gt.load_graph(filename)
	9	self.graph = nx.Graph()
	10	self.graph_gt_labels = self.graph_gt.vp.label_map
	11
	12	def read(self):
	13	# reconstruct networkx graph from graph-tool graph
	14	for edge in self.graph_gt.edges():
	15	from_node = int(self.graph_gt_labels[edge.source()])
	16	to_node = int(self.graph_gt_labels[edge.target()])
	17	print "Creating edge from node",from_node,"to node",to_node
	18	if (from_node != to_node):
	19	self.graph.add_edge(from_node, to_node)
	20	return self.graph
	21
	22	def read_gt(self):
	23	return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
	24
	25

File gt_file_importer.pyc added (mode: 100644) (index 0000000..12f8577)

File gt_start.py added (mode: 100644) (index 0000000..728489f)
	1	#!/usr/bin/env python
	2	import datetime
	3	import argparse
	4	import cProfile, pstats, StringIO
	5	from gt_file_importer import FileImporter
	6	from metric_calculator import MetricCalculator
	7	import datetime as dt
	8
	9	print 'Starting metric calculation',dt.datetime.now()
	10	## added this line of code ###
	11	start_time = dt.datetime.now()
	12	##############################
	13	parser = argparse.ArgumentParser(description='Read a Tab-separated Graph Datafile and start Calculation of Metrics and Statistics as configured in config.py')
	14
	15	parser.add_argument('filename', metavar='filename', type=str,
	16	help='the name of the data file containing tab separated node ids')
	17
	18	parser.add_argument('--profiling',dest='profiling',action='store_true', help='enable runtime profiling into profiling.txt file')
	19
	20	args = parser.parse_args()
	21
	22	if args.profiling:
	23	pr = cProfile.Profile()
	24	s = StringIO.StringIO()
	25	timestamp = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
	26	outfile = open('profiling_output_'+timestamp+'.txt', 'w')
	27	pr.enable()
	28
	29	fi = FileImporter(args.filename)
	30	graph = fi.read()
	31	#print('This should be a Network X graph',graph)
	32	print 'Network X graph has the following number of nodes',graph.number_of_nodes()
	33	print 'Network X graph has the following number of edges',graph.number_of_edges()
	34	graph_gt = fi.read_gt()
	35	print 'Graph tool graph has the following number of nodes',graph_gt['graph_gt'].num_vertices()
	36	print 'Graph tool graph has the following number of edges',graph_gt['graph_gt'].num_edges()
	37	#print('Gt graph has the following properties')
	38
	39	time_delta = dt.datetime.now() - start_time
	40	print 'Time taken to create graphs:',time_delta
	41
	42	mc = MetricCalculator(graph,graph_gt)
	43	mc.start()
	44
	45	if args.profiling:
	46	ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
	47	ps.print_stats()
	48	outfile.write(s.getvalue())
	49
	50	## added this line of code ###
	51	time_delta = dt.datetime.now() - start_time
	52	##############################
	53	print 'Ending metric calculation',dt.datetime.now()
	54	## and this line below #######
	55	print 'Time taken to calculate:',time_delta

File indexing.py added (mode: 100644) (index 0000000..28f726f)
	1	#indexing
	2	def index_graph(self):
	3	self.redis.sadd(self.graph_index_key, self.graph_name)
	4
	5	def index_nodes(self):
	6	for node in self.nodes:
	7	self.redis.sadd(self.node_index_key, node)
	8
	9	def index_neighbors(self):
	10	for node in self.nodes:
	11	node_neighbors = self.graph.neighbors(int(node))
	12	for neighbor in node_neighbors:
	13	self.redis.sadd(self.node_neighbors_prefix+str(node), neighbor)
	14
	15	def index_metrics(self):
	16	for metric in self.base_metrics:
	17	self.redis.sadd(self.metric_index_key, metric)
	18
	19	for advanced_metric in self.advanced_metrics:
	20	self.redis.sadd(self.metric_index_key, advanced_metric)
	21
	22	def index_scores(self):
	23	for score in self.scores:
	24	self.redis.sadd(self.score_index_key, score)
	25
	26	for advanced_score in self.advanced_scores:
	27	self.redis.sadd(self.score_index_key, advanced_score)

File indexing.pyc added (mode: 100644) (index 0000000..68587bb)

The diff for file log is too big (644344 changes) and cannot be shown.

File metric_calculator.py added (mode: 100644) (index 0000000..26c1f58)
	1	import networkx as nx
	2	import graph_tool.all as gt
	3	import redis as rd
	4	import numpy as np
	5	import indexing
	6	import statistics
	7	import normalizations
	8	import config
	9	import percolation
	10	import visualization
	11	import datetime as dt
	12
	13
	14	class MetricCalculator(object):
	15	def __init__ (self, graph, graph_gt):
	16	#class constructor
	17	#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
	18
	19	print ('Starting metric_calculator!')
	20
	21	# for code evaluation
	22	self.start_time = dt.datetime.now()
	23	self.durations = {}
	24	self.durations_in_seconds = {}
	25	self.durations_in_percent = {}
	26
	27	self.graph = graph
	28	self.graph_gt = graph_gt
	29
	30	# alternate name for graph tool graph
	31	self.g = self.graph_gt['graph_gt']
	32	# alternate name for graph tool labels
	33	self.g.vp.label_map = self.graph_gt['graph_gt_labels']
	34	self.label_map = self.g.vp.label_map
	35	# vertex property map for percolation calculations
	36	self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
	37	self.exclusion_map = self.g.vp.exmap
	38	self.exclusion_map.a = 1 #initialise filter map
	39	#find largest component of graph tool graph for percolation calculations
	40	# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
	41	self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
	42
	43
	44	self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
	45	self.nodes = nx.nodes(graph)
	46
	47
	48	# configuration variables are read from the config file and are also saved to class variables for easy access
	49	self.graph_index_key = config.graph_index_key
	50
	51	self.graph_name = ''
	52	while (self.graph_name == ''):
	53	self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
	54
	55	self.info_index_key = self.graph_name+':'+config.info_index_key
	56	self.node_index_key = self.graph_name+':'+config.node_index_key
	57	self.metric_index_key = self.graph_name+':'+config.metric_index_key
	58	self.score_index_key = self.graph_name+':'+config.score_index_key
	59	self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
	60	self.layout_index_key = self.graph_name+':'+config.layout_index_key
	61
	62	self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
	63	self.node_prefix = self.graph_name+':'+config.node_prefix
	64	self.metric_prefix = self.graph_name+':'+config.metric_prefix
	65	self.score_prefix = self.graph_name+':'+config.score_prefix
	66	self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
	67	self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
	68
	69	self.normalization_suffix = config.normalization_suffix
	70
	71	self.base_metrics = config.base_metrics
	72	self.advanced_metrics = config.advanced_metrics
	73
	74	self.normalization_methods = config.normalization_methods
	75
	76	self.scores = config.scores
	77	self.advanced_scores = config.advanced_scores
	78
	79	self.visualization_layouts = config.visualization_layouts
	80
	81	self.percolation_modes = config.percolation_modes
	82	self.advanced_percolation_modes = config.advanced_percolation_modes
	83
	84	##############################################################################
	85	###### start describes the entire calculation in a high level overview #######
	86	##############################################################################
	87
	88	def start(self):
	89	start_time_calculation = dt.datetime.now()
	90
	91	#preliminary calculations
	92	self.flush_database()
	93	self.obtain_percentages()
	94	self.create_info()
	95	self.create_standard_layout()
	96	self.save_graph_data('raw')
	97
	98	#index creation
	99	self.create_indexes()
	100
	101	#main calculations
	102	self.calculate_metrics()
	103	self.calculate_advanced_metrics()
	104	self.normalize_metrics()
	105	self.calculate_scores()
	106	self.calculate_advanced_scores()
	107
	108	#statistics
	109	self.calculate_statistics()
	110
	111	#dynamic metrics / percolation
	112	self.calculate_percolation()
	113
	114	#visualization
	115	self.visualize_graph()
	116
	117	#save final graph
	118	self.save_graph_data('full')
	119
	120	#evaluation
	121	self.duration_total = dt.datetime.now() - start_time_calculation
	122	self.evaluate_durations()
	123
	124
	125	###################
	126	## PRELIMINARIES ##
	127	###################
	128	def flush_database(self):
	129	# ask to clean all data in Redis
	130	flush_flag = 'Flushing'
	131	while (flush_flag != 'y' and flush_flag != 'n'):
	132	flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
	133	if flush_flag == 'y':
	134	self.redis.flushdb()
	135
	136	def obtain_percentages(self):
	137	# obtain percentages for calculation of deterioration #
	138	# and calculate number of nodes to remove from graph ##
	139	percentages = '' # initialise
	140	while (percentages == ''):
	141	percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
	142
	143	percentages = sorted([float(pct)for pct in percentages.split()])
	144	numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
	145	# create a dictionary of percentages and corresponding numbers of nodes
	146	self.percentages = dict(zip(numbers,percentages))
	147	# storing values in redis DB
	148	self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
	149
	150	def create_info(self):
	151	#store general info about graph
	152	self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
	153	self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
	154
	155	def create_standard_layout(self):
	156	# create a standard layout
	157	start_time = dt.datetime.now()
	158	print 'Creating standard layout for graph visualization.'
	159	if not hasattr(self.g.vp, 'sfdp'):
	160	self.sfdp = gt.sfdp_layout(self.g, C=0.5)
	161	self.g.vp['sfdp'] = self.sfdp
	162	else:
	163	self.sfdp = self.g.vp['sfdp']
	164	self.durations['SFDP_layout'] = dt.datetime.now() - start_time
	165
	166	def save_graph_data(self,name):
	167	# save graph
	168	start_time = dt.datetime.now()
	169	print 'Saving raw graph data'
	170	self.g.save(self.graph_name+'_'+name+'.gt.gz')
	171	self.durations['saving_graph'+name] = dt.datetime.now() - start_time
	172
	173	##################
	174	#### INDEXING ####
	175	##################
	176	def create_indexes(self):
	177	start_time = dt.datetime.now()
	178	#call methods defined in indexing.py
	179	indexing.index_graph(self)
	180	indexing.index_nodes(self)
	181	indexing.index_neighbors(self)
	182	indexing.index_metrics(self)
	183	indexing.index_scores(self)
	184	#indexing.index_percolation(self)
	185	self.durations['indexing'] = dt.datetime.now() - start_time
	186
	187	###########################
	188	#### CALCULATION LOOPS ####
	189	###########################
	190
	191	def calculate_metrics(self):
	192	start_time_total = dt.datetime.now()
	193	# loop through all defined metrics and call specified calculation method for each node
	194	print ('Starting calculate_metrics')
	195	for metric_name in self.base_metrics:
	196	start_time = dt.datetime.now()
	197	metric_method = self.base_metrics[metric_name]
	198
	199	# loop through all nodes
	200	for node in self.nodes:
	201	# call calculation method of supplied metric for current node
	202	node = int(node)
	203	value = float(metric_method(self,node))
	204
	205	#store result in node values
	206	self.redis.hset(self.node_prefix+str(node), metric_name, value)
	207
	208	#also store result to metric set
	209	self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
	210	self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
	211	self.durations['metrics_total'] = dt.datetime.now() - start_time_total
	212
	213
	214	def calculate_advanced_metrics(self):
	215	start_time_total = dt.datetime.now()
	216	# loop through all defined_advanced_metrics and call specified calculation method
	217	print ('Starting calculate_advanced_metrics')
	218	for advanced_metric_name in self.advanced_metrics:
	219	start_time = dt.datetime.now()
	220	metric_method = self.advanced_metrics[advanced_metric_name]
	221
	222	# loop through all nodes
	223	for node in self.nodes:
	224	node = int(node)
	225	value = float(metric_method(self,node))
	226
	227	#store result in node values
	228	self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
	229
	230	#also store result to metric set
	231	self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
	232	self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
	233	self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
	234
	235
	236	# loop through all defined normalizations and call respective normalization method
	237	# no default normalizations for metrics not listed in the "normalization_methods" hash
	238	def normalize_metrics(self):
	239	start_time = dt.datetime.now()
	240	#fallback normalization: min-max
	241	print ('Starting normalize_metrics')
	242	all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
	243
	244	for metric_name in all_metrics:
	245	if self.normalization_methods.has_key(metric_name):
	246	normalization_method = self.normalization_methods[metric_name]
	247	else:
	248	#fallback normalization is min-max
	249	normalization_method = normalizations.min_max
	250	normalization_method(self,metric_name)
	251
	252	self.durations['normalizing'] = dt.datetime.now() - start_time
	253
	254
	255	def calculate_scores(self):
	256	start_time = dt.datetime.now()
	257	print ('Starting calculate_scores')
	258	for score_name in self.scores:
	259	metrics_with_weights = self.scores[score_name]
	260
	261	for node in self.nodes:
	262	score_value = 0.0
	263
	264	# get normalized values
	265	for metric in metrics_with_weights:
	266	weight = self.scores[score_name][metric]
	267	value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
	268	score_value += weight * value
	269
	270	#redis_server.hset(key, value, number);
	271	self.redis.hset(self.node_prefix+str(node),score_name, score_value)
	272
	273	self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
	274
	275	self.durations['scores'] = dt.datetime.now() - start_time
	276
	277	def calculate_advanced_scores(self):
	278	start_time = dt.datetime.now()
	279	print ('Starting calculate_advanced_scores')
	280	for advanced_score in self.advanced_scores:
	281	self.advanced_scores[advanced_score](self)
	282
	283	self.durations['adv_scores'] = dt.datetime.now() - start_time
	284
	285
	286	#############
	287	# statistics
	288	#############
	289
	290	def calculate_statistics(self):
	291	start_time = dt.datetime.now()
	292	print ('Starting calculate_statistics')
	293	for metric in self.base_metrics:
	294	#absolute and normalized
	295	statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
	296	statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
	297
	298	for advanced_metric in self.advanced_metrics:
	299	#absolute and normalized
	300	statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
	301	statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
	302
	303	for score in self.scores:
	304	statistics.calculate_statistics(self, score, self.score_prefix+score)
	305
	306	for advanced_score in self.advanced_scores:
	307	statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
	308	self.durations['statistics:stats'] = dt.datetime.now() - start_time
	309
	310	start_time = dt.datetime.now()
	311	statistics.calculate_correlations(self)
	312	self.durations['statistics:corr'] = dt.datetime.now() - start_time
	313
	314	###################
	315	# dynamic metrics #
	316	###################
	317
	318	def calculate_percolation(self):
	319	start_time_total = dt.datetime.now()
	320	print ('Starting percolation calculation')
	321
	322	# shorten the name for percentages and corresponding numbers of nodes to remove
	323	n = self.percentages
	324
	325	# BASIC PERCOLATION MODES
	326	# basic percolation modes take mode_name and n as input and return a #
	327	# dictionary with percentage of nodes removed as key and percentage ##
	328	# of deterioration as value
	329	for mode_name in self.percolation_modes:
	330	start_time = dt.datetime.now()
	331	# initialise exlusion vertex property map
	332	self.exclusion_map.a = 1
	333	# read method from config file
	334	mode_method = self.percolation_modes[mode_name]
	335	# execute method
	336	results = mode_method(self,mode_name,n)
	337	# index percolation mode
	338	self.redis.sadd(self.percolation_index_key, mode_name)
	339	# store values
	340	print 'Storing percolation percentages'
	341	for percentage in results:
	342	value = results[percentage]
	343	#store in hash set
	344	self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
	345
	346	self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
	347
	348	# ADVANCED PERCOLATION MODES
	349	# advanced percolation modes take mode_name and n as input and return a ###
	350	# dictionary with groups of percolation modes (e.g. metrics, countries) ###
	351	# as keys and dictionaries of percentages (removed: deteriorated) as values
	352	for mode_name in self.advanced_percolation_modes:
	353	start_time = dt.datetime.now()
	354	# initialise exlusion vertex property map
	355	self.exclusion_map.a = 1
	356	# read method from config file
	357	mode_method = self.advanced_percolation_modes[mode_name]
	358	# execute method
	359	results = mode_method(self,mode_name,n)
	360
	361	# store values
	362	print 'Storing percolation percentages'
	363	for group in results:
	364	# index percolation modes
	365	self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
	366	for percentage in results[group]:
	367	value = results[group][percentage]
	368	#store in hash set
	369	self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
	370
	371	self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
	372
	373	self.durations['percolation_total'] = dt.datetime.now() - start_time_total
	374
	375
	376	def visualize_graph(self):
	377
	378	for layout_name in self.visualization_layouts:
	379	start_time = dt.datetime.now()
	380	print 'Creating visualisation with '+layout_name+' layout'
	381
	382	layout_method = self.visualization_layouts[layout_name]
	383	pos = layout_method(self)
	384	gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
	385
	386	self.redis.sadd(self.layout_index_key, layout_name)
	387	self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
	388
	389	def evaluate_durations(self):
	390	#print out times taken
	391	print 'times taken:'
	392	output = open(str(self.graph_name)+"_duration_test.txt","w")
	393	output.write("Graph Name:\t"+str(self.graph_name)+"\n")
	394	output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
	395	output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
	396	output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
	397	output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
	398	for key in self.durations:
	399	self.durations_in_seconds[key] = self.durations[key].total_seconds()
	400	self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
	401
	402	print str(key)+'\t'+str(self.durations_in_percent[key])
	403	output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')

File metric_calculator.pyc added (mode: 100644) (index 0000000..b7d78f3)

File metrics.py added (mode: 100644) (index 0000000..6692689)
	1	#metrics.py
	2	import networkx as nx
	3	import numpy as np
	4	import datetime as dt
	5	import graph_tool.all as gt
	6
	7	def clustering_coefficient(self,node):
	8	print 'Calculating clustering_coefficient for node',node
	9	#in the first run calculate the metric for all nodes at once and save in a hash of the instance to access later
	10	#NOTE: this should result in a performance gain, but for very large graphs this might be a problem.
	11	# in this case, just returning nx.clustering(self.graph, node) might be better
	12	if not hasattr(self, 'all_clustering_coefficients'):
	13	self.all_clustering_coefficients = nx.clustering(self.graph)
	14
	15	#get the actual value from the pre-calculated hash
	16	return self.all_clustering_coefficients[node]
	17
	18	def degree(self, node):
	19	print 'Calculating degree for node', node
	20	return self.graph.degree(node)
	21
	22	def degree_gt(self, node):
	23	print 'Calculating degree with graph tool for node', node
	24	# find index of node
	25	node_index = gt.find_vertex(self.g, self.label_map, node)[0]
	26
	27	# calculate degree for all nodes
	28	if not hasattr(self.g.vp, 'degree'):
	29	self.g.vp['degree'] = self.g.degree_property_map("total")
	30
	31	return self.g.vp.degree[node_index]
	32
	33	def eigenvector_centrality_gt(self, node):
	34	print 'Calculating eigenvector centrality with graph_tool for node', node
	35
	36	if not hasattr(self.g.vertex_properties, 'eigenvector'):
	37	eigenvalue, eigenvector = gt.eigenvector(self.g)
	38	self.g.vertex_properties.eigenvector = eigenvector
	39	self.eigenvalue = eigenvalue
	40
	41	node_index = gt.find_vertex(self.g, self.label_map,node)[0]
	42
	43	# this has been adjusted with eigenvalue for nicer values
	44	return self.g.vp.eigenvector[self.g.vertex(node_index)]*float(self.eigenvalue)
	45
	46	def eigenvector_centrality(self, node):
	47	print 'Calculating eigenvector centrality for node', node
	48
	49	if not hasattr(self, 'all_eigenvector_centralities'):
	50	self.all_eigenvector_centralities = nx.eigenvector_centrality(self.graph,max_iter=100000)
	51
	52	return self.all_eigenvector_centralities[node]
	53
	54	def average_neighbor_degree(self,node):
	55	print 'Calculating average_neighbour_degree for node',node
	56	# same caching technique as in self.clustering_coefficient
	57	# might also break for very large graphs
	58	# nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go
	59
	60	if not hasattr(self, 'all_average_neighbor_degrees'):
	61	self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
	62	return self.all_average_neighbor_degrees[node]
	63
	64	def iterated_average_neighbor_degree(self, node):
	65	print 'Calculating iterated_average_neighbor degree for node',node
	66	result = 0 # initialise
	67
	68	first_level_neighbors = self.graph.neighbors(node)
	69	# print ('First level neigbors are', first_level_neighbors)
	70	if len(first_level_neighbors) != 0:
	71	second_level_neighbors = []
	72	# print ('Second level neigbors are', second_level_neighbors)
	73	# get all two-hop nodes
	74	for first_level_neighbor in first_level_neighbors:
	75	current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
	76	second_level_neighbors.extend(current_second_level_neighbors)
	77
	78	#remove one-hop nodes and self
	79	relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
	80
	81	if len(relevant_nodes) != 0:
	82	degree_sum = 0
	83	for relevant_node in relevant_nodes:
	84	degree_sum += self.graph.degree(relevant_node)
	85	result = float(degree_sum)/float(len(relevant_nodes))
	86	return result
	87
	88	def iterated_average_neighbour_degree_gt(self, node):
	89	print 'Calculating iterated_average_neighbour degree with graph tool for node',node
	90
	91	result = 0 # initialise
	92
	93	vertex = gt.find_vertex(self.g, self.label_map, node)[0]
	94	first_level_neighbours = vertex.all_neighbors()
	95
	96	if len(first_level_neighbours) != 0:
	97	second_level_neighbours = []
	98	# get all two-hop nodes
	99	for first_level_neighbour in first_level_neighbours:
	100	current_second_level_neighbours = first_level_neighbour.all_neighbours()
	101	second_level_neighbours.extend(current_second_level_neighbours)
	102
	103	#remove one-hop nodes and self
	104	relevant_vertices = set(second_level_neighbours) - set(first_level_neighbours) - set([vertex])
	105
	106	if len(relevant_vertices) != 0:
	107	# if degree has not been calculated, yet, calculate degree for all nodes
	108	if not hasattr(self.g.vp, 'degree'):
	109	self.g.vp['degree'] = self.g.degree_property_map("total")
	110
	111	degree_sum = 0 # initialise
	112	for relevant_vertex in relevant_vertices:
	113	degree_sum += self.g.vp.degree[relevant_vertex]
	114	result = float(degree_sum)/float(len(relevant_vertices))
	115	return result
	116
	117	def eccentricity(self, node):
	118	print 'Calculating eccentricity for node', node
	119	if not hasattr(self, 'all_eccentricities'):
	120	l = gt.label_largest_component(self.g) #find the largest component
	121	print ('Found the largest component')
	122	# print ("Printing labeled largest component",l.a)
	123	u = gt.GraphView(self.g, vfilt=l) # extract the largest component as a graph
	124	print 'The number of vertices in the largest component is', u.num_vertices()
	125	print 'The number of vertices in the original graph is', self.g.num_vertices()
	126	# if nx.is_connected(self.graph) == True:
	127	if (u.num_vertices() == nx.number_of_nodes(self.graph)):
	128	print ("Graph is connected")
	129	self.all_eccentricities = nx.eccentricity(self.graph)
	130	print ("Calculated all eccentricities")
	131	# print("Eccentricities are",self.all_eccentricities)
	132	return self.all_eccentricities[node]
	133	else:
	134	# return 0
	135	print("Graph is disconnected")
	136	self.all_eccentricities = {}
	137	if (self.all_eccentricities != {}):
	138	print("Returning eccentricity for",node,"-",self.all_eccentricities[node])
	139	return self.all_eccentricities[node]
	140	else:
	141	print("Returning 0")
	142	return 0
	143
	144	def eccentricity_gt(self, node):
	145	print 'Calculating eccentricity with graph tool for node', node
	146
	147	#find index of node
	148	node_index = gt.find_vertex(self.g, self.label_map, node)[0]
	149
	150	if not hasattr(self.g.gp,'pseudo_diameter'):
	151	# find approx. diameter
	152	print 'Finding maximum distance for walk'
	153	self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
	154	self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
	155	# endpoints will not be used
	156
	157	#find all distances from node
	158	distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
	159	#calculate maximum
	160	maximum = np.ma.max(np.ma.masked_where(distances > 2147483646, distances),0)
	161	return maximum
	162
	163	def eccentricity_gt_s(self, node):
	164	print 'Calculating eccentricity for small graphs with graph tool for node', node
	165	eccentricity = 0 # initialise
	166
	167	#find index of node
	168	node_index = gt.find_vertex(self.g, self.label_map, node)[0]
	169	#get all shortest path lengths
	170	if not hasattr(self, 'all_distances'):
	171	self.all_distances = gt.shortest_distance(self.g)
	172
	173	for distance in self.all_distances[node_index]:
	174	if distance < 2147483647: # disregard all nodes which are not accessible
	175	eccentricity = max(eccentricity, distance)
	176	return eccentricity
	177
	178	def betweenness_centrality(self, node):
	179	print 'Calculating betweenness_centrality for node',node
	180	if not hasattr(self, 'all_betweenness_centralities'):
	181	self.all_betweenness_centralities = nx.betweenness_centrality(self.graph)
	182	return self.all_betweenness_centralities[node]
	183
	184
	185	def betweenness_centrality_gt(self, node):
	186	print 'Calculating betweenness_centrality with graph_tool for node',node
	187	# print('Self is',self.graph_gt['graph_gt'])
	188	# print('Self is also',self.graph_gt['graph_gt_labels'])
	189	# def convert_graph(g):
	190	#converts a networkX graph to graph_tool
	191	#important : NetworkX node indexes start with 1, whereas Graph tool node indexes start with 0
	192	# adj = nx.adjacency_matrix(g)
	193	# j = gt.Graph(directed=False)
	194	# j.add_vertex(len(adj))
	195	# num_vertices = adj.shape[0]
	196	# for i in range(num_vertices - 1):
	197	# for l in range(i + 1, num_vertices):
	198	# if adj[i,l] != 0:
	199	# j.add_edge(i, l)
	200	# return j
	201
	202
	203	if not hasattr(self.g.vertex_properties, 'betweenness'):
	204	vp,ep = gt.betweenness(self.g)
	205	# internalize property maps
	206	self.g.vertex_properties.betweenness = vp
	207	self.g.edge_properties.betweenness = ep
	208	node_index = gt.find_vertex(self.g,self.label_map,node)[0]
	209	# print("Node",node,"has index",node_label)
	210	# print('Vp is',vp)
	211	# print('Betweenness centrality of node',node,'is',vp[self.graph_gt['graph_gt'].vertex(node_label[0])])
	212
	213	return self.g.vp.betweenness[self.g.vertex(node_index)]
	214
	215	def average_shortest_path_length(self, node):
	216	print 'Calculating average_shortest_path_length for node',node
	217	# caching average_shortest_path_length for all nodes at one failed
	218	# already switched to single calculation
	219
	220	#get all shortest path lengths
	221	all_shortest_path_lengths_for_node = nx.shortest_path_length(self.graph, source=node)
	222
	223	#calculate average
	224	sum_of_lengths = 0
	225	for target in all_shortest_path_lengths_for_node:
	226	sum_of_lengths += all_shortest_path_lengths_for_node[target]
	227
	228	return float(sum_of_lengths)/len(all_shortest_path_lengths_for_node)
	229
	230	def average_shortest_path_length_gt(self, node):
	231	print 'Calculating average_shortest_path_length with graph tool for node',node
	232	#find index of node
	233	node_index = gt.find_vertex(self.g, self.label_map, node)[0]
	234
	235	if not hasattr(self.g.gp,'pseudo_diameter'):
	236	# find approx. diameter
	237	print 'Finding maximum distance for walk'
	238	self.g.gp['pseudo_diameter'] = self.g.new_gp("int")
	239	self.g.gp.pseudo_diameter, endpoints = gt.pseudo_diameter(self.glc)
	240	# endpoints will not be used
	241
	242	#find all distances from node
	243	distances = gt.shortest_distance(self.g,node_index,max_dist=self.g.gp.pseudo_diameter+1).a
	244	#calculate average
	245	average = np.ma.average(np.ma.masked_where(distances > 2147483646, distances))
	246	return float(average)
	247
	248	def average_shortest_path_length_gt_small_graphs(self, node):
	249	print 'Calculating average_shortest_path_length for small graphs with graph tool for node',node
	250	result = 0 # initialise
	251
	252	#find index of node
	253	node_index = gt.find_vertex(self.g, self.label_map, node)[0]
	254	#get all shortest path lengths
	255	if not hasattr(self, 'all_distances'):
	256	self.all_distances = gt.shortest_distance(self.g)
	257
	258	distances = self.all_distances[node_index]
	259	#calculate average
	260	sum_of_distances = 0
	261	accessible_nodes = 0
	262	for distance in distances:
	263	if distance < 2147483647: # disregard all nodes in other components
	264	sum_of_distances += distance
	265	accessible_nodes += 1
	266	if accessible_nodes != 0:
	267	result = float(sum_of_distances)/float(accessible_nodes)
	268	return result
	269
	270	def deterioration(self, node):
	271	print'Calculating deterioration due to removal of node', node
	272
	273	#g = self.graph_gt['graph_gt']
	274	#g.vp.temp = g.new_vertex_property("bool") #create property map for exclusion
	275	#g.vp.temp.a = 1 #initialise filter map
	276	node_index = gt.find_vertex(self.g, self.label_map, node)[0]
	277	self.exclusion_map[node_index] = 0 #take out node
	278	u = gt.GraphView(self.g, vfilt = self.exclusion_map)
	279	u = gt.GraphView(self.g, vfilt = gt.label_largest_component(u))
	280	p = 100.0*(1.0-float(u.num_vertices())/float(self.glc.num_vertices()))
	281	self.exclusion_map[node_index] = 1 #reset node
	282
	283	return p
	284
	285	#############
	286	# advanced metrics
	287	#############
	288	def correct_clustering_coefficient(self,node):
	289	print 'Calculating correct_clustering_coefficient for node',node
	290	clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
	291	degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
	292	max_degree = self.redis.zrange(self.metric_prefix+'degree', -1, -1, withscores=True, score_cast_func=float)[0][1]
	293	corrected_cc = clustering_coefficient * np.log(degree) / np.log(max_degree)
	294	return corrected_cc
	295
	296	def correct_clustering_coefficient_old(self,node):
	297	print 'Calculating correct_clustering_coefficient for node',node
	298	clustering_coefficient = float(self.redis.hget(self.node_prefix+str(node),'clustering_coefficient'))
	299	degree = float(self.redis.hget(self.node_prefix+str(node), 'degree'))
	300	corrected_cc = clustering_coefficient + (degree * clustering_coefficient) / float(4)
	301	return corrected_cc
	302
	303	def correct_average_neighbor_degree(self,node):
	304	print 'Calculating correct_average_neighbor degree for node',node
	305	avgnd = float(self.redis.hget(self.node_prefix+str(node), 'average_neighbor_degree'))
	306
	307	if avgnd == 0.0:
	308	result = avgnd
	309	else:
	310	neighbors = self.graph.neighbors(node)
	311	number_of_neighbors = float(len(neighbors))
	312	if number_of_neighbors == 0.0:
	313	result = avgnd
	314	else:
	315	neighbor_degrees = []
	316	for neighbor in neighbors:
	317	neighbor_degrees.append(self.graph.degree(neighbor))
	318
	319	#using numpy median and standard deviation implementation
	320	numpy_neighbor_degrees = np.array(neighbor_degrees)
	321	standard_deviation = np.std(numpy_neighbor_degrees)
	322	if standard_deviation == 0.0:
	323	result = avgnd
	324	else:
	325	median = np.median(numpy_neighbor_degrees)
	326	result = avgnd + ( ((median - avgnd) / standard_deviation) / number_of_neighbors ) * avgnd
	327	return result
	328
	329	def correct_iterated_average_neighbor_degree(self, node):
	330	print 'Calculating correct_iterated_average_neighbor_degree for node '+str(node)
	331	iand = float(self.redis.hget(self.node_prefix+str(node), 'iterated_average_neighbor_degree'))
	332	ciand = iand
	333	if iand != 0.0:
	334	first_level_neighbors = self.graph.neighbors(node)
	335	second_level_neighbors = []
	336
	337	# get all two-hop nodes
	338	for first_level_neighbor in first_level_neighbors:
	339	current_second_level_neighbors = self.graph.neighbors(first_level_neighbor)
	340	second_level_neighbors.extend(current_second_level_neighbors)
	341
	342	#remove one-hop neighbors and self
	343	relevant_nodes = set(second_level_neighbors) - set(first_level_neighbors) - set([node])
	344
	345	if len(relevant_nodes) != 0:
	346	node_degrees = []
	347	for relevant_node in relevant_nodes:
	348	node_degrees.append(self.graph.degree(relevant_node))
	349
	350	numpy_node_degrees = np.array(node_degrees)
	351	standard_deviation = np.std(numpy_node_degrees)
	352	if standard_deviation != 0.0:
	353	median = np.median(numpy_node_degrees)
	354	ciand = iand + ( ((median - iand) / standard_deviation) / float(len(relevant_nodes)) ) * iand
	355	return ciand
	356
	357
	358
	359
	360
	361

File metrics.pyc added (mode: 100644) (index 0000000..a564700)

The diff for file nohup.out is too big (1252439 changes) and cannot be shown.

File normalizations.py added (mode: 100644) (index 0000000..a959a8c)
	1	#normalizations.py
	2	def min_max(self,metric_name):
	3	#perform min max normalization of specified metric for all nodes
	4	#min_max normalization
	5	#get min and max from redis
	6	x_min = self.redis.zrange(self.metric_prefix+metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
	7	x_max = self.redis.zrange(self.metric_prefix+metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
	8
	9	#print x_min
	10	#print x_max
	11
	12	for node in self.nodes:
	13	if x_min == x_max:
	14	x_normalized = 1.0
	15	else:
	16	x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
	17	x_normalized = (x - x_min) / (x_max - x_min)
	18
	19	#store value for node and metric
	20	self.redis.zadd(self.metric_prefix+metric_name+self.normalization_suffix, x_normalized, str(node))
	21	self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)
	22
	23	#max min normalization
	24	def max_min(self,metric_name):
	25	x_min = self.redis.zrange(self.metric_prefix+metric_name, 0, 0, withscores=True, score_cast_func=float)[0][1]
	26	x_max = self.redis.zrange(self.metric_prefix+metric_name, -1, -1, withscores=True, score_cast_func=float)[0][1]
	27
	28	for node in self.nodes:
	29	if x_min == x_max:
	30	x_normalized = 1.0
	31	else:
	32	x = float(self.redis.hget(self.node_prefix+str(node), metric_name))
	33	x_normalized = (x_max - x) / (x_max - x_min)
	34
	35	#store value for node and metric
	36	self.redis.zadd(self.metric_prefix+metric_name+self.normalization_suffix, x_normalized, str(node))
	37	self.redis.hset(self.node_prefix+str(node),metric_name+self.normalization_suffix, x_normalized)

File normalizations.pyc added (mode: 100644) (index 0000000..85e8e32)

File percolation.py added (mode: 100644) (index 0000000..7038340)
	1	import graph_tool.all as gt
	2	import numpy as np
	3	import datetime as dt
	4	import visualization
	5
	6	###############################################
	7	### NOTE: We use the largest component, not ###
	8	### the entire graph for the calculation ###
	9	###############################################
	10
	11	###############################################
	12	### functions used by all percolation modes ###
	13	###############################################
	14	def percolation(percolated_graph,intact_graph):
	15	return 100.0*(1.0-float(percolated_graph.num_vertices())/float(intact_graph.num_vertices()))
	16
	17	def print_info(flc, glc):
	18	print 'filtered graph - vertices: '+str(flc.num_vertices())+' / edges: '+str(flc.num_edges())
	19	print 'percolation: '+str(percolation(flc,glc))+'%'
	20
	21	# the function below was needed in previous versions of CoRiA because the set of set members was nested within another set
	22	#def read_redis_smembers(redis,key):
	23	# s = redis.smembers(key) #read set
	24	#return [i.strip() for i in [l.strip('[]').split(',') for l in s][0]] #write list and strip of useless characters
	25
	26
	27	#################################
	28	####### percolation modes #######
	29	#################################
	30
	31	# These percolation modes take as input the mode name and n - a dictionary of
	32	# numbers of nodes to take out (as keys) and corresponding percentages.
	33	# They return a dictionary of percentage keys and percolation values.
	34	# Advanced percolation modes nest this dictionary within a dictionary of groups.
	35	# Therefore, they require a loop over these groups, which can be e.g. metrics or countries.
	36
	37	#################################
	38	#### BASIC PERCOLATION MODES ####
	39	#################################
	40	def failure(self, mode_name, n):
	41	print 'Calculating percolation due to random failure'
	42	# initialise
	43	counter = 0
	44	results = {}
	45
	46	# take a random sample from the largest component
	47	for v in np.random.choice(list(self.glc.vertices()),size=max(n.keys()),replace=False):
	48	self.exclusion_map[self.g.vertex(v)] = 0
	49	counter += 1
	50	if counter in n.keys():
	51	print counter,'nodes removed'
	52	# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
	53	f = gt.GraphView(self.g, vfilt = self.exclusion_map)
	54	# largest component of graph f
	55	flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
	56	print_info(flc,self.glc)
	57	results[n[counter]] = percolation(flc,self.glc)
	58	# visualize deterioration
	59	# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[counter]))+'_pct')
	60
	61	return results
	62
	63	#####################################################################
	64
	65	def random_walk(self, mode_name, n):
	66	print 'Calculating percolation due to random walk'
	67	#first vertex for random walk
	68	start = self.glc.vertex(np.random.randint(0,self.glc.num_vertices()), use_index=False)
	69
	70	#do random walk
	71	alternate_list = list(self.label_map.a)
	72	np.random.shuffle(alternate_list)
	73	results = rw(self,start,n,alternate_list,mode_name)
	74
	75	#return dict(zip(percentages,percolations))
	76	return results
	77
	78	#####################################################################
	79
	80	#################################
	81	## ADVANCED PERCOLATION MODES ###
	82	#################################
	83	def target_list(self, mode_name, n):
	84	print 'Calculating percolation due to targeted attack , i.e. taking out top nodes from a target list'
	85	# instantiate results dictionary and target lists
	86	results = {}
	87	nodes_max = {}
	88
	89	#loop through all metrics
	90	all_metrics = list(self.base_metrics.keys() + self.advanced_metrics.keys())
	91	for metric in all_metrics:
	92	#get nodes with highest value of metric
	93	nodes_max[metric] = self.redis.zrange(self.metric_prefix+metric+self.normalization_suffix, -max(n.keys()), -1, withscores=False, score_cast_func=float).reverse()
	94	#loop through all scores
	95	all_scores = list(self.scores.keys() + self.advanced_scores.keys())
	96	for score in all_scores:
	97	#get nodes with highest value of score
	98	nodes_max[score] = self.redis.zrange(self.score_prefix+score, -max(n.keys()), -1, withscores=False, score_cast_func=float).reverse()
	99
	100	#loop through all metrics and scores
	101	for metric in all_metrics+all_scores:
	102	print 'Taking out top nodes for metric',metric
	103
	104	# initialise variables and exclusion map
	105	counter = 0
	106	self.exclusion_map.a = 1
	107	results[metric] = {}
	108
	109	for node in nodes_max[metric]:
	110	vertex = gt.find_vertex(self.g,self.label_map,node)[0]
	111	self.exclusion_map[vertex] = 0
	112	counter += 1
	113	if counter in n.keys():
	114	print counter,'nodes removed'
	115	# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
	116	f = gt.GraphView(self.g, vfilt = self.exclusion_map)
	117	# largest component of graph f
	118	flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
	119	print_info(flc,self.glc)
	120	results[metric][n[counter]] = percolation(flc,self.glc)
	121	# visualize deterioration
	122	# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+metric+'_'+str(int(n[counter]))+'_pct')
	123
	124	return results
	125
	126	#####################################################################
	127
	128	def hybrid_mode(self, mode_name, n):
	129	print 'Calculating percolation due to random walk starting from node with highest value of metric'
	130	# instantiate results dictionary and alternate lists for random walk
	131	results = {}
	132	alternate_lists = {}
	133
	134	#loop through all metrics
	135	all_metrics = list(self.base_metrics.keys() + self.advanced_metrics.keys())
	136	for metric in all_metrics:
	137	#get nodes with highest value of metric
	138	temp_list = self.redis.zrange(self.metric_prefix+metric+self.normalization_suffix, 0, -1, withscores=False, score_cast_func=float)
	139	alternate_lists[metric] = [node for node in reversed(temp_list)]
	140
	141	#loop through all scores
	142	all_scores = list(self.scores.keys() + self.advanced_scores.keys())
	143	for score in all_scores:
	144	#get nodes with highest value of score
	145	temp_list = self.redis.zrange(self.score_prefix+score, 0, -1, withscores=False, score_cast_func=float)
	146	alternate_lists[score] = [node for node in reversed(temp_list)]
	147
	148	#loop through all metrics and scores
	149	for metric in all_metrics+all_scores:
	150	print 'Starting from node with highest value of metric',metric
	151	#initialise exclusion vertex property map
	152	self.exclusion_map.a = 1
	153
	154	#first vertex for random walk
	155	start = gt.find_vertex(self.g,self.label_map,alternate_lists[metric][0])[0]
	156
	157	#do random walk
	158	results[metric] = rw(self,start,n,alternate_lists[metric],mode_name+'_'+metric)
	159
	160	return results
	161
	162	def russian(self, mode_name, n):
	163	print 'Calculating percolation due to shutting off the Russian network from the internet'
	164	# instantiate results dictionary and target lists
	165	#results = {}
	166	#nodes_max = {}
	167	self.exclusion_map.a = 0
	168	counter = 0
	169	results = {}
	170	for v in self.g.vertices():
	171	if self.g.vp.country_code[v] == 'RU':
	172	print 'Shutting off node',int(v),'because it\'s Russian!'
	173	self.exclusion_map[v] = 1
	174	counter += 1
	175	# if counter in n.keys():
	176	# print counter,'nodes removed'
	177	# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
	178	# f = gt.GraphView(self.g, vfilt = self.exclusion_map)
	179	# largest component of graph f
	180	# flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
	181	# print_info(flc,self.glc)
	182	# results[n[counter]] = percolation(flc,self.glc)
	183	# visualize deterioration
	184	# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+metric+'_'+str(int(n[counter]))+'_pct')
	185
	186	f = gt.GraphView(self.g, vfilt = self.exclusion_map)
	187	flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
	188	#results[max(n.values())] = percolation(flc,self.g)
	189	# visualize deterioration
	190	print 'Creating visualization #1 of the deterioration.'
	191	visualization.draw_deterioration(self,self.g.vp.sfdp,mode_name+"_SFDP_inverse")
	192	print 'Creating visualization #2 of the deterioration.'
	193	visualization.draw_deterioration(self,self.g.vp.Random,mode_name+"_Random_inverse")
	194	print 'Creating visualization #3 of the deterioration.'
	195	visualization.draw_deterioration(self,self.g.vp.Radial,mode_name+"_Radial_inverse")
	196	#return results
	197	#####################################################################
	198	############## Random Walk for the RW deletion modes ################
	199	#####################################################################
	200
	201	# takes as input a start vertex, the number of vertices to take out
	202	# and an alternate list of vertices if the random walk reaches a dead end
	203
	204	def rw(self, vertex, n, alternate_list, mode_name):
	205	# initialise
	206	results = {}
	207
	208	self.exclusion_map[vertex] = 0 #take out start vertex
	209	# initialise graph filters
	210	# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
	211	f = gt.GraphView(self.g, vfilt = self.exclusion_map)
	212	# largest component of graph f
	213	flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f))
	214	if 1 in n.keys():
	215	print '1 node removed'
	216	print_info(flc,self.glc)
	217	results[n[1]] = percolation(flc,self.glc)
	218	# visualize deterioration
	219	# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[1]))+'_pct')
	220
	221	for i in range(max(n.keys())-1):
	222	neighbours = list(vertex.all_neighbours())
	223	flag = 0 #decision flag
	224
	225	# choose a random neighbour
	226	if len(neighbours) > 0:
	227	np.random.shuffle(neighbours)
	228	for neighbour in neighbours:
	229	if self.exclusion_map[neighbour] != 0:
	230	vertex = neighbour
	231	flag = 1
	232	break
	233
	234	# to be executed if no neighbours exist - choose the next node out of an alternative list
	235	if flag == 0:
	236	# create a list of already used list members
	237	used_list = []
	238	for node in alternate_list:
	239	vertex = gt.find_vertex(self.g,self.label_map,node)[0]
	240	used_list.append(node)
	241	if self.exclusion_map[vertex] != 0:
	242	break
	243	if len(used_list) > 0:
	244	for used_node in used_list:
	245	# remove used members from alternate list. This reduces calculation time in next iteration
	246	alternate_list.remove(used_node)
	247
	248	self.exclusion_map[vertex] = 0 #take out vertex
	249	f = gt.GraphView(self.g, vfilt = self.exclusion_map) #update graph (filtered)
	250	if i+2 in n.keys():
	251	flc = gt.GraphView(self.g, vfilt = gt.label_largest_component(f)) #update largest component
	252	print i+2,'nodes removed'
	253	print_info(flc,self.glc)
	254	results[n[i+2]] = percolation(flc,self.glc)
	255	# visualize deterioration
	256	# visualization.draw_deterioration(self,self.sfdp,mode_name+'_'+str(int(n[i+2]))+'_pct')
	257
	258	return results
	259
	260	##############################
	261	##############################
	262	########## THE END ###########
	263	##############################
	264	##############################
	265

File percolation.pyc added (mode: 100644) (index 0000000..e81344c)

File ru_file_importer.py added (mode: 100644) (index 0000000..5c06bec)
	1	import networkx as nx
	2	import graph_tool.all as gt
	3
	4	class FileImporter(object):
	5	def __init__(self,filename):
	6	# read graph from *.gt file (graph-tool file) and initialize empty networkx graph
	7	print 'Starting file importer!'
	8	self.graph_gt = gt.load_graph(filename)
	9	self.graph = nx.Graph()
	10	self.graph_gt_labels = self.graph_gt.vp.label_map
	11	# Russian ASN file
	12	self.asn_file = open('RU_ASN.txt')
	13	self.graph_gt.vp['country_code'] = self.graph_gt.new_vertex_property("string")
	14
	15	def read(self):
	16	# reconstruct networkx graph from graph-tool graph
	17	for edge in self.graph_gt.edges():
	18	from_node = int(self.graph_gt_labels[edge.source()])
	19	to_node = int(self.graph_gt_labels[edge.target()])
	20	print "Creating edge from node",from_node,"to node",to_node
	21	if (from_node != to_node):
	22	self.graph.add_edge(from_node, to_node)
	23	return self.graph
	24
	25	def read_gt(self):
	26	return {'graph_gt':self.graph_gt, 'graph_gt_labels':self.graph_gt_labels}
	27
	28	def read_country(self):
	29	labels = list(self.graph_gt_labels.a)
	30	for line in self.asn_file:
	31	print 'Russian ASN:',line
	32	asn = float(line.strip())
	33
	34	if asn in labels:
	35	vertex = gt.find_vertex(self.graph_gt,self.graph_gt_labels,asn)[0]
	36	self.graph_gt.vp.country_code[vertex] = 'RU'
	37	labels.remove(asn)
	38	return self.graph_gt.vp.country_code

File ru_file_importer.pyc added (mode: 100644) (index 0000000..2ec49e6)

File ru_metric_calculator.py added (mode: 100644) (index 0000000..05d8f41)
	1	import networkx as nx
	2	import graph_tool.all as gt
	3	import redis as rd
	4	import numpy as np
	5	import indexing
	6	import statistics
	7	import normalizations
	8	import config
	9	import percolation
	10	import visualization
	11	import datetime as dt
	12
	13
	14	class MetricCalculator(object):
	15	def __init__ (self, graph, graph_gt):
	16	#class constructor
	17	#define required class variables such as the graph to work on, the redis connection and the nodes of the graph
	18
	19	print ('Starting metric_calculator!')
	20
	21	# for code evaluation
	22	self.start_time = dt.datetime.now()
	23	self.durations = {}
	24	self.durations_in_seconds = {}
	25	self.durations_in_percent = {}
	26
	27	self.graph = graph
	28	self.graph_gt = graph_gt
	29
	30	# alternate name for graph tool graph
	31	self.g = self.graph_gt['graph_gt']
	32	# alternate name for graph tool labels
	33	if not hasattr(self.g.vp, 'label_map'):
	34	self.g.vp.label_map = self.graph_gt['graph_gt_labels']
	35	self.label_map = self.g.vp.label_map
	36	# vertex property map for percolation calculations
	37	if not hasattr(self.g.vp, 'exmap'):
	38	self.g.vp.exmap = self.g.new_vertex_property("bool") #internalizes map
	39
	40	self.exclusion_map = self.g.vp.exmap
	41	self.exclusion_map.a = 1 #initialise filter map
	42	#find largest component of graph tool graph for percolation calculations
	43	# percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
	44	self.glc = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))
	45
	46	if not hasattr(self.g.vp, 'eigenvector'):
	47	eigenvalue, self.g.vp.eigenvector = gt.eigenvector(self.g)
	48	if not hasattr(self.g.ep, 'betweenness'):
	49	betweenness,self.g.ep.betweenness = gt.betweenness(self.g)
	50
	51	self.redis = rd.StrictRedis(host='localhost', port=6379, db=1)
	52	self.nodes = nx.nodes(graph)
	53
	54
	55	# configuration variables are read from the config file and are also saved to class variables for easy access
	56	self.graph_index_key = config.graph_index_key
	57
	58	self.graph_name = ''
	59	while (self.graph_name == ''):
	60	self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")
	61
	62	self.info_index_key = self.graph_name+':'+config.info_index_key
	63	self.node_index_key = self.graph_name+':'+config.node_index_key
	64	self.metric_index_key = self.graph_name+':'+config.metric_index_key
	65	self.score_index_key = self.graph_name+':'+config.score_index_key
	66	self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
	67	self.layout_index_key = self.graph_name+':'+config.layout_index_key
	68
	69	self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
	70	self.node_prefix = self.graph_name+':'+config.node_prefix
	71	self.metric_prefix = self.graph_name+':'+config.metric_prefix
	72	self.score_prefix = self.graph_name+':'+config.score_prefix
	73	self.statistics_prefix = self.graph_name+':'+config.statistics_prefix
	74	self.percolation_prefix = self.graph_name+':'+config.percolation_prefix
	75
	76	self.normalization_suffix = config.normalization_suffix
	77
	78	self.base_metrics = config.base_metrics
	79	self.advanced_metrics = config.advanced_metrics
	80
	81	self.normalization_methods = config.normalization_methods
	82
	83	self.scores = config.scores
	84	self.advanced_scores = config.advanced_scores
	85
	86	self.visualization_layouts = config.visualization_layouts
	87	# this is commented out for testing purposes
	88	# self.percolation_modes = config.percolation_modes
	89	# self.advanced_percolation_modes = config.advanced_percolation_modes
	90
	91	self.percolation_modes = {'russian_shutoff':config.percolation_modes['russian_shutoff']}
	92	self.advanced_percolation_modes = {}
	93
	94	##############################################################################
	95	###### start describes the entire calculation in a high level overview #######
	96	##############################################################################
	97
	98	def start(self):
	99	start_time_calculation = dt.datetime.now()
	100
	101	#preliminary calculations
	102	#self.flush_database()
	103	self.obtain_percentages()
	104	#self.create_info()
	105	#self.create_standard_layout()
	106	#self.save_graph_data('raw')
	107
	108	#index creation
	109	#self.create_indexes()
	110
	111	#main calculations
	112	#self.calculate_metrics()
	113	#self.calculate_advanced_metrics()
	114	#self.normalize_metrics()
	115	#self.calculate_scores()
	116	#self.calculate_advanced_scores()
	117
	118	#statistics
	119	#self.calculate_statistics()
	120
	121	#dynamic metrics / percolation
	122	self.calculate_percolation()
	123
	124	#visualization
	125	#self.visualize_graph()
	126
	127	#save final graph
	128	self.save_graph_data('russian')
	129
	130	#evaluation
	131	self.duration_total = dt.datetime.now() - start_time_calculation
	132	self.evaluate_durations()
	133
	134
	135	###################
	136	## PRELIMINARIES ##
	137	###################
	138	def flush_database(self):
	139	# ask to clean all data in Redis
	140	flush_flag = 'Flushing'
	141	while (flush_flag != 'y' and flush_flag != 'n'):
	142	flush_flag = raw_input("Would you like to flush the database before continuing? [y/n]")
	143	if flush_flag == 'y':
	144	self.redis.flushdb()
	145
	146	def obtain_percentages(self):
	147	# obtain percentages for calculation of deterioration #
	148	# and calculate number of nodes to remove from graph ##
	149	percentages = '' # initialise
	150	while (percentages == ''):
	151	percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")
	152
	153	percentages = sorted([float(pct)for pct in percentages.split()])
	154	numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
	155	# create a dictionary of percentages and corresponding numbers of nodes
	156	self.percentages = dict(zip(numbers,percentages))
	157	# storing values in redis DB
	158	#self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))
	159
	160	def create_info(self):
	161	#store general info about graph
	162	self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
	163	self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())
	164
	165	def create_standard_layout(self):
	166	# create a standard layout
	167	start_time = dt.datetime.now()
	168	print 'Creating standard layout for graph visualization.'
	169	if not hasattr(self.g.vp,'sfdp'):
	170	self.g.vp.sfdp = gt.sfdp_layout(self.g, C=0.5)
	171	#self.durations['SFDP_layout'] = dt.datetime.now() - start_time
	172	print self.durations['SFDP_layout']
	173
	174	def save_graph_data(self,name):
	175	# save graph
	176	start_time = dt.datetime.now()
	177	print 'Saving raw graph data'
	178	self.g.save(self.graph_name+'_'+name+'.gt.gz')
	179	self.durations['saving_graph'+name] = dt.datetime.now() - start_time
	180
	181	##################
	182	#### INDEXING ####
	183	##################
	184	def create_indexes(self):
	185	start_time = dt.datetime.now()
	186	#call methods defined in indexing.py
	187	#indexing.index_graph(self)
	188	#indexing.index_nodes(self)
	189	#indexing.index_neighbors(self)
	190	#indexing.index_metrics(self)
	191	#indexing.index_scores(self)
	192	#indexing.index_percolation(self)
	193	self.durations['indexing'] = dt.datetime.now() - start_time
	194
	195	###########################
	196	#### CALCULATION LOOPS ####
	197	###########################
	198
	199	def calculate_metrics(self):
	200	start_time_total = dt.datetime.now()
	201	# loop through all defined metrics and call specified calculation method for each node
	202	print ('Starting calculate_metrics')
	203	for metric_name in self.base_metrics:
	204	start_time = dt.datetime.now()
	205	metric_method = self.base_metrics[metric_name]
	206
	207	# loop through all nodes
	208	for node in self.nodes:
	209	# call calculation method of supplied metric for current node
	210	node = int(node)
	211	value = float(metric_method(self,node))
	212
	213	#store result in node values
	214	self.redis.hset(self.node_prefix+str(node), metric_name, value)
	215
	216	#also store result to metric set
	217	self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
	218	self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
	219	self.durations['metrics_total'] = dt.datetime.now() - start_time_total
	220
	221
	222	def calculate_advanced_metrics(self):
	223	start_time_total = dt.datetime.now()
	224	# loop through all defined_advanced_metrics and call specified calculation method
	225	print ('Starting calculate_advanced_metrics')
	226	for advanced_metric_name in self.advanced_metrics:
	227	start_time = dt.datetime.now()
	228	metric_method = self.advanced_metrics[advanced_metric_name]
	229
	230	# loop through all nodes
	231	for node in self.nodes:
	232	node = int(node)
	233	value = float(metric_method(self,node))
	234
	235	#store result in node values
	236	self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)
	237
	238	#also store result to metric set
	239	self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
	240	self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
	241	self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total
	242
	243
	244	# loop through all defined normalizations and call respective normalization method
	245	# no default normalizations for metrics not listed in the "normalization_methods" hash
	246	def normalize_metrics(self):
	247	start_time = dt.datetime.now()
	248	#fallback normalization: min-max
	249	print ('Starting normalize_metrics')
	250	all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())
	251
	252	for metric_name in all_metrics:
	253	if self.normalization_methods.has_key(metric_name):
	254	normalization_method = self.normalization_methods[metric_name]
	255	else:
	256	#fallback normalization is min-max
	257	normalization_method = normalizations.min_max
	258	normalization_method(self,metric_name)
	259
	260	self.durations['normalizing'] = dt.datetime.now() - start_time
	261
	262
	263	def calculate_scores(self):
	264	start_time = dt.datetime.now()
	265	print ('Starting calculate_scores')
	266	for score_name in self.scores:
	267	metrics_with_weights = self.scores[score_name]
	268
	269	for node in self.nodes:
	270	score_value = 0.0
	271
	272	# get normalized values
	273	for metric in metrics_with_weights:
	274	weight = self.scores[score_name][metric]
	275	value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
	276	score_value += weight * value
	277
	278	#redis_server.hset(key, value, number);
	279	self.redis.hset(self.node_prefix+str(node),score_name, score_value)
	280
	281	self.redis.zadd(self.score_prefix+score_name, score_value, str(node))
	282
	283	self.durations['scores'] = dt.datetime.now() - start_time
	284
	285	def calculate_advanced_scores(self):
	286	start_time = dt.datetime.now()
	287	print ('Starting calculate_advanced_scores')
	288	for advanced_score in self.advanced_scores:
	289	self.advanced_scores[advanced_score](self)
	290
	291	self.durations['adv_scores'] = dt.datetime.now() - start_time
	292
	293
	294	#############
	295	# statistics
	296	#############
	297
	298	def calculate_statistics(self):
	299	start_time = dt.datetime.now()
	300	print ('Starting calculate_statistics')
	301	for metric in self.base_metrics:
	302	#absolute and normalized
	303	statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
	304	statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)
	305
	306	for advanced_metric in self.advanced_metrics:
	307	#absolute and normalized
	308	statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
	309	statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)
	310
	311	for score in self.scores:
	312	statistics.calculate_statistics(self, score, self.score_prefix+score)
	313
	314	for advanced_score in self.advanced_scores:
	315	statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
	316	self.durations['statistics:stats'] = dt.datetime.now() - start_time
	317
	318	start_time = dt.datetime.now()
	319	statistics.calculate_correlations(self)
	320	self.durations['statistics:corr'] = dt.datetime.now() - start_time
	321
	322	###################
	323	# dynamic metrics #
	324	###################
	325
	326	def calculate_percolation(self):
	327	start_time_total = dt.datetime.now()
	328	print ('Starting percolation calculation')
	329
	330	# shorten the name for percentages and corresponding numbers of nodes to remove
	331	n = self.percentages
	332
	333	# BASIC PERCOLATION MODES
	334	# basic percolation modes take mode_name and n as input and return a #
	335	# dictionary with percentage of nodes removed as key and percentage ##
	336	# of deterioration as value
	337	for mode_name in self.percolation_modes:
	338	start_time = dt.datetime.now()
	339	# initialise exlusion vertex property map
	340	self.exclusion_map.a = 1
	341	# read method from config file
	342	mode_method = self.percolation_modes[mode_name]
	343	# execute method
	344	#results = mode_method(self,mode_name,n)
	345	mode_method(self,mode_name,n)
	346	# index percolation mode
	347	#self.redis.sadd(self.percolation_index_key, mode_name)
	348	# store values
	349	#print 'Storing percolation percentages'
	350	#for percentage in results:
	351	# value = results[percentage]
	352	#store in hash set
	353	#self.redis.hset(self.percolation_prefix+mode_name, percentage, value)
	354
	355	self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
	356
	357	# ADVANCED PERCOLATION MODES
	358	# advanced percolation modes take mode_name and n as input and return a ###
	359	# dictionary with groups of percolation modes (e.g. metrics, countries) ###
	360	# as keys and dictionaries of percentages (removed: deteriorated) as values
	361	for mode_name in self.advanced_percolation_modes:
	362	start_time = dt.datetime.now()
	363	# initialise exlusion vertex property map
	364	self.exclusion_map.a = 1
	365	# read method from config file
	366	mode_method = self.advanced_percolation_modes[mode_name]
	367	# execute method
	368	results = mode_method(self,mode_name,n)
	369
	370	# store values
	371	#print 'Storing percolation percentages'
	372	#for group in results:
	373	# index percolation modes
	374	# self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
	375	#for percentage in results[group]:
	376	# value = results[group][percentage]
	377	#store in hash set
	378	#self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)
	379
	380	self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time
	381
	382	self.durations['percolation_total'] = dt.datetime.now() - start_time_total
	383
	384
	385	def visualize_graph(self):
	386
	387	for layout_name in self.visualization_layouts:
	388	start_time = dt.datetime.now()
	389	print 'Creating visualisation with '+layout_name+' layout'
	390
	391	layout_method = self.visualization_layouts[layout_name]
	392	self.g.vp[layout_name] = layout_method(self)
	393	gt.graph_draw(self.glc, pos=self.g.vp[layout_name], output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")
	394
	395	self.redis.sadd(self.layout_index_key, layout_name)
	396	self.durations['layout:'+layout_name] = dt.datetime.now() - start_time
	397	print self.durations['layout:'+layout_name]
	398
	399	def evaluate_durations(self):
	400	#print out times taken
	401	print 'times taken:'
	402	output = open(str(self.graph_name)+"_duration_test_2.txt","w")
	403	output.write("Graph Name:\t"+str(self.graph_name)+"\n")
	404	output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
	405	output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
	406	output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
	407	output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
	408	for key in self.durations:
	409	self.durations_in_seconds[key] = self.durations[key].total_seconds()
	410	self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0
	411
	412	print str(key)+'\t'+str(self.durations_in_percent[key])
	413	output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')

File ru_metric_calculator.pyc added (mode: 100644) (index 0000000..3d11047)

File ru_start.py added (mode: 100644) (index 0000000..faf282d)
	1	#!/usr/bin/env python
	2	import datetime
	3	import argparse
	4	import cProfile, pstats, StringIO
	5	from ru_file_importer import FileImporter
	6	from ru_metric_calculator import MetricCalculator
	7	import datetime as dt
	8
	9	print 'Starting metric calculation',dt.datetime.now()
	10	## added this line of code ###
	11	start_time = dt.datetime.now()
	12	##############################
	13	parser = argparse.ArgumentParser(description='Read a Tab-separated Graph Datafile and start Calculation of Metrics and Statistics as configured in config.py')
	14
	15	parser.add_argument('filename', metavar='filename', type=str,
	16	help='the name of the data file containing tab separated node ids')
	17
	18	parser.add_argument('--profiling',dest='profiling',action='store_true', help='enable runtime profiling into profiling.txt file')
	19
	20	args = parser.parse_args()
	21
	22	if args.profiling:
	23	pr = cProfile.Profile()
	24	s = StringIO.StringIO()
	25	timestamp = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
	26	outfile = open('profiling_output_'+timestamp+'.txt', 'w')
	27	pr.enable()
	28
	29	fi = FileImporter(args.filename)
	30	graph = fi.read()
	31	#print('This should be a Network X graph',graph)
	32	print 'Network X graph has the following number of nodes',graph.number_of_nodes()
	33	print 'Network X graph has the following number of edges',graph.number_of_edges()
	34	country_codes = fi.read_country()
	35	graph_gt = fi.read_gt()
	36	print 'Graph tool graph has the following number of nodes',graph_gt['graph_gt'].num_vertices()
	37	print 'Graph tool graph has the following number of edges',graph_gt['graph_gt'].num_edges()
	38	#print('Gt graph has the following properties')
	39
	40	time_delta = dt.datetime.now() - start_time
	41	print 'Time taken to create graphs:',time_delta
	42
	43	mc = MetricCalculator(graph,graph_gt)
	44	mc.start()
	45
	46	if args.profiling:
	47	ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
	48	ps.print_stats()
	49	outfile.write(s.getvalue())
	50
	51	## added this line of code ###
	52	time_delta = dt.datetime.now() - start_time
	53	##############################
	54	print 'Ending metric calculation',dt.datetime.now()
	55	## and this line below #######
	56	print 'Time taken to calculate:',time_delta

File start.py added (mode: 100755) (index 0000000..2fa307e)
	1	#!/usr/bin/env python
	2	import datetime
	3	import argparse
	4	import cProfile, pstats, StringIO
	5	from file_importer import FileImporter
	6	from metric_calculator import MetricCalculator
	7	import datetime as dt
	8
	9	print 'Starting metric calculation',dt.datetime.now()
	10	## added this line of code ###
	11	start_time = dt.datetime.now()
	12	##############################
	13	parser = argparse.ArgumentParser(description='Read a Tab-separated Graph Datafile and start Calculation of Metrics and Statistics as configured in config.py')
	14
	15	parser.add_argument('filename', metavar='filename', type=str,
	16	help='the name of the data file containing tab separated node ids')
	17
	18	parser.add_argument('--profiling',dest='profiling',action='store_true', help='enable runtime profiling into profiling.txt file')
	19
	20	args = parser.parse_args()
	21
	22	if args.profiling:
	23	pr = cProfile.Profile()
	24	s = StringIO.StringIO()
	25	timestamp = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
	26	outfile = open('profiling_output_'+timestamp+'.txt', 'w')
	27	pr.enable()
	28
	29	fi = FileImporter(args.filename)
	30	graph = fi.read()
	31	#print('This should be a Network X graph',graph)
	32	print 'Network X graph has the following number of nodes',graph.number_of_nodes()
	33	print 'Network X graph has the following number of edges',graph.number_of_edges()
	34	graph_gt = fi.read_gt()
	35	print 'Graph tool graph has the following number of nodes',graph_gt['graph_gt'].num_vertices()
	36	print 'Graph tool graph has the following number of edges',graph_gt['graph_gt'].num_edges()
	37	#print('Gt graph has the following properties')
	38
	39	## added two lines of code ###
	40	duration_parsing_lines = dt.datetime.now() - start_time
	41	print 'Time taken to parse lines:',duration_parsing_lines
	42
	43	mc = MetricCalculator(graph,graph_gt)
	44	mc.start()
	45
	46	if args.profiling:
	47	ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
	48	ps.print_stats()
	49	outfile.write(s.getvalue())
	50
	51	## added this line of code ###
	52	duration_total = dt.datetime.now() - start_time
	53	##############################
	54	print 'Ending metric calculation',dt.datetime.now()
	55	## and this line below #######
	56	print 'Time taken to calculate:',duration_total

File statistics.py added (mode: 100644) (index 0000000..cf8423b)
	1	#statistics.py
	2	import redis as rd
	3	import numpy as np
	4	from scipy.stats import pearsonr
	5
	6	def calculate_statistics(self,metric,redis_key):
	7	all_values = dict(self.redis.zrange(redis_key, 0, -1, withscores=True, score_cast_func=float)).values()
	8	min_value = np.min(all_values)
	9	max_value = np.max(all_values)
	10
	11	average = np.average(all_values)
	12	median = np.median(all_values)
	13	standard_deviation = np.std(all_values)
	14
	15	self.redis.hset(self.statistics_prefix+metric, 'min', min_value)
	16	self.redis.hset(self.statistics_prefix+metric, 'max', max_value)
	17	self.redis.hset(self.statistics_prefix+metric, 'average', average)
	18	self.redis.hset(self.statistics_prefix+metric, 'median', median)
	19	self.redis.hset(self.statistics_prefix+metric, 'standard_deviation', standard_deviation)
	20
	21
	22	def calculate_correlations(self):
	23	m = self.base_metrics.keys()
	24	c = self.advanced_metrics.keys()
	25
	26	metrics = m + c
	27
	28	correlations = {}
	29	for metric1 in metrics:
	30	correlations[metric1] = {}
	31	for metric2 in metrics:
	32	correlations[metric1][metric2] = (0,0)
	33	if metric1 == metric2:
	34	correlations[metric1][metric2] = (1,0)
	35	continue
	36
	37	dict_metric1 = dict(self.redis.zrange(self.metric_prefix+metric1, 0, -1, withscores=True, score_cast_func=float))
	38	dict_metric2 = dict(self.redis.zrange(self.metric_prefix+metric2, 0, -1, withscores=True, score_cast_func=float))
	39	values_metric1 = []
	40	values_metric2 = []
	41
	42	for key in sorted(dict_metric1.iterkeys()):
	43	values_metric1.append(dict_metric1[key])
	44
	45	for key in sorted(dict_metric2.iterkeys()):
	46	values_metric2.append(dict_metric2[key])
	47
	48	correlations[metric1][metric2] = pearsonr(values_metric1,values_metric2)
	49
	50	values_metric1 = []
	51	values_metric2 = []
	52
	53	for source in correlations:
	54	for target in correlations[source]:
	55	self.redis.hset(self.statistics_prefix+"correlations:"+source+":"+target, "correlation", correlations[source][target][0])
	56	self.redis.hset(self.statistics_prefix+"correlations:"+source+":"+target, "confidence", correlations[source][target][1])
	57
	58

File statistics.pyc added (mode: 100644) (index 0000000..f8f55b5)

File test.py added (mode: 100644) (index 0000000..1a809a9)
	1	import argparse
	2	import graph_tool.all as gt
	3	import redis as rd
	4	import numpy as np
	5	import config
	6
	7	parser = argparse.ArgumentParser(description='Read values from the Redis DB and write a short summary into an output text file.')
	8	parser.add_argument('db_index',metavar='db_index',type=int,help='Database Index')
	9	args = parser.parse_args()
	10	db_index = args.db_index
	11
	12	redis = rd.StrictRedis(host='localhost', port=6379, db=db_index)
	13
	14	#def read_redis_smembers(redis,key):
	15	# s = redis.smembers(key) #read set
	16	#return [i.strip() for i in [l.strip('[]').split(',') for l in s][0]] #write list and strip of useless characters
	17	print config.graph_index_key
	18	print redis.smembers(config.graph_index_key)
	19
	20	#for graph in redis.smembers(config.graph_index_key):
	21	# all_nodes = redis.smembers(graph+':'+config.node_index_key)
	22
	23	#output = open(str(graph)+"-DB-"+str(db_index)+"test_out.txt","w")
	24	#output.write("Graph Name: "+str(graph)+"\n")
	25	#print all_nodes
	26	#node_list = list(all_nodes)
	27	#string = map(str,node_list)
	28	#print '\n'.join(string)
	29	#output.write('\n'.join(string))
	30
	31	#output.close

File visualization.py added (mode: 100644) (index 0000000..39e8f08)
	1	import graph_tool.all as gt
	2	import numpy as np
	3
	4	# these methods give as output a property map of positions (i.e. the layout for the visualization)
	5
	6	def random(self):
	7	# creating visualisation with Random layout
	8	pos = gt.random_layout(self.glc)
	9	coloured_drawing(self, pos, 'Random_Advanced', 'heptagon')
	10	# gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_RANDOM.png")
	11	return pos
	12
	13	def frucht(self):
	14	# creating visualisation with Fruchtermann-Reingold layout
	15	pos = gt.fruchterman_reingold_layout(self.glc, r=1.8, n_iter=36)
	16	coloured_drawing(self, pos, 'Fruchterman_Reingold_Advanced', 'hexagon')
	17	# gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_FRUCHT.png")
	18	return pos
	19
	20	def arf(self):
	21	# creating visualisation with ARF layout
	22	pos = gt.arf_layout(self.glc, max_iter=1000)
	23	# gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_ARF.png")
	24	return pos
	25
	26	def radial(self):
	27	# creating visualisation with Radial Tree layout
	28	if not hasattr(self.g.vp, 'betweenness'):
	29	for i in range(0,self.g.num_vertices()):
	30	max_asn = self.redis.zrange(self.metric_prefix+'betweenness_centrality_(gt)',-i-1,-i-1,withscores=False,score_cast_func=float)[0]
	31	max_vertex = gt.find_vertex(self.glc, self.label_map, max_asn)
	32	#test whether vertex exists and vertex is in largest component
	33	if max_vertex != None:
	34	break #end loop
	35	pos = gt.radial_tree_layout(self.glc, root=max_vertex[0], weighted=True)
	36	else:
	37	if hasattr(self.g.vp, 'eigenvector'):
	38	temp = self.glc.vp.eigenvector
	39	else:
	40	temp = self.glc.vp.betweenness
	41	max_vertex = gt.find_vertex(self.glc, self.glc.vp.betweenness, np.max(self.glc.vp.betweenness.a))
	42	pos = gt.radial_tree_layout(self.glc, root=max_vertex[0], rel_order=temp, weighted=True, node_weight=temp)
	43
	44	#gt.graph_draw(self.glc, pos=pos, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_RADIAL.png")
	45	coloured_drawing(self, pos, 'Radial_Advanced_2')
	46	return pos
	47
	48	def sfdp(self):
	49	# creating visualisation with SFDP layout
	50	if not hasattr(self.g.vp, 'sfdp'):
	51	self.g.vp.sfdp = gt.sfdp_layout(self.glc)
	52
	53	pos = self.g.vp.sfdp
	54
	55	# test colouring of graph
	56	coloured_drawing(self, pos, 'SFDP_Advanced', 'octagon')
	57	return pos
	58
	59	def coloured_drawing(self, pos, name, shape="circle"):
	60	if not hasattr(self, 'eigc'):
	61	# caching to reduce calculation time overall
	62	# define metric property maps for colouring
	63	eigc = self.g.vp.eigenvector.copy()
	64	ebwc = self.g.ep.betweenness.copy()
	65
	66	# right-size property maps
	67	eigc.a = np.sqrt(eigc.a)
	68	eigc = gt.prop_to_size(eigc)
	69	ebwc = gt.prop_to_size(ebwc)
	70	#vsize = eigc.copy() # obtain property map for size of vertices
	71	eigc.a /= eigc.a.max() # normalization to 0-1
	72	ebwc.a /= ebwc.a.max() # normalization to 0-1
	73	# obtain maps for edges
	74	eorder = ebwc.copy()
	75	eorder.a *= -1
	76	econtrol = self.g.new_edge_property("vector<double>")
	77	for e in self.glc.edges():
	78	d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 3
	79	econtrol[e] = [0.3,d,0.7,d]
	80	# storing for later access
	81	self.eigc = eigc
	82	self.ebwc = ebwc
	83	self.eorder = eorder
	84	self.econtrol= econtrol
	85
	86	if not hasattr(self, 'vcolour'):
	87	vcolour = self.g.new_vertex_property("vector<double>") # obtain colour map
	88	for v in self.glc.vertices():
	89	vcolour[v] = [self.eigc[v],self.eigc[v]/4.0,self.eigc[v]/6.0,(1.0+2.0*self.eigc[v])/3.0]
	90	#vsize[v] = int(vsize[v])
	91
	92
	93	# obtain maps for edges
	94	ecolour = self.g.new_edge_property("vector<double>")
	95	for e in self.glc.edges():
	96	ecolour[e] = [self.ebwc[e]/8.0,self.ebwc[e]/2.0,self.ebwc[e],(1.0+2.0*self.ebwc[e])/4.0]
	97
	98	# storing for later access
	99	self.vcolour = vcolour
	100	self.ecolour = ecolour
	101
	102	gt.graph_draw(self.glc, pos=pos, vertex_shape=shape, vertex_fill_color=self.vcolour, vorder=self.eigc, edge_color=self.ecolour, eorder=self.eorder, edge_control_points=self.econtrol, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+name+".png")
	103
	104	self.redis.sadd(self.layout_index_key, name)
	105
	106	def draw_deterioration(self, pos, name, shape="circle"):
	107	excl = self.exclusion_map.copy()
	108	if not hasattr(self, 'eigc'):
	109	# caching to reduce calculation time overall
	110	# define metric property maps for colouring
	111	eigc = self.g.vp.eigenvector.copy()
	112	ebwc = self.g.ep.betweenness.copy()
	113	# right-size property maps
	114	eigc.a = np.sqrt(eigc.a)
	115	eigc = gt.prop_to_size(eigc)
	116	ebwc = gt.prop_to_size(ebwc)
	117	#vsize = eigc.copy() # obtain property map for size of vertices
	118	eigc.a /= eigc.a.max() # normalization to 0-1
	119	ebwc.a /= ebwc.a.max() # normalization to 0-1
	120	# obtain maps for edges
	121	eorder = ebwc.copy()
	122	eorder.a *= -1
	123
	124	econtrol = self.g.new_edge_property("vector<double>")
	125	for e in self.glc.edges():
	126	d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 3
	127	econtrol[e] = [0.3,d,0.7,d]
	128	# storing for later access
	129	self.eigc = eigc
	130	self.ebwc = ebwc
	131	self.eorder = eorder
	132	self.econtrol= econtrol
	133
	134	# graph without the excluded vertices (i.e. those that have value 0 in the exclusion map)
	135	f = gt.GraphView(self.g, vfilt = excl)
	136	# largest component of graph f
	137	l = gt.label_largest_component(f)
	138	vfcolour = self.g.new_vertex_property("vector<double>") # obtain colour map
	139	vcolour = vfcolour.copy()
	140	for v in self.g.vertices():
	141	excl[v] *= l[v]
	142	if excl[v] != 1:
	143	vfcolour[v] = [0.0,0.0,0.0,0.01]
	144	vcolour[v] = [0.0,0.0,0.0,0.05]
	145	else:
	146	vfcolour[v] = [self.eigc[v],self.eigc[v]/4.0,self.eigc[v]/6.0,(1.0+2.0*self.eigc[v])/3.0]
	147	vcolour[v] = [self.eigc[v]/2.0,self.eigc[v]/3.0,self.eigc[v]/4.0,(2.0+1.0*self.eigc[v])/3.0]
	148	#vsize[v] = int(vsize[v])
	149
	150	ecolour = self.g.new_edge_property("vector<double>")
	151	for e in self.g.edges():
	152	if excl[e.target()] != 1 or excl[e.source()] != 1:
	153	ecolour[e] = [0.0,0.0,0.0,0.05]
	154	else:
	155	ecolour[e] = [self.ebwc[e]/8.0,self.ebwc[e]/2.0,self.ebwc[e],(1.0+2.0*self.ebwc[e])/4.0]
	156
	157	gt.graph_draw(self.g, pos=pos, vertex_shape=shape, vertex_color=vcolour, vertex_fill_color=vfcolour, vorder=self.eigc, edge_color=ecolour, eorder=self.eorder, edge_control_points=self.econtrol, output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+name+".png")
	158
	159	#self.redis.sadd(self.layout_index_key,name)
	160	#self.redis.hset(self.percolation_prefix+self.layout_index_key,name,pct)

File visualization.pyc added (mode: 100644) (index 0000000..a4a569c)

Hints:
Before first commit, do not forget to setup your git environment:

git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):

git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):

git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:

git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:

... clone the repository ...
... make some changes and some commits ...
git push origin main