RocketGit

coria / coria-backend (public) (License: Unspecified) (since 2017-02-23) (hash sha1)

No description available

Clone URLs: https://rocketgit.com/user/coria/coria-backend ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend git://git.rocketgit.com/user/coria/coria-backend

feature/coria-ver1 feature/coria-ver1.5 feature/coria-ver2 master

/metric_calculator.py (733b22f32ecb738568099b60b22c781df40dd067) (15904 bytes) (mode 100644) (type blob)

import networkx as nx
import graph_tool.all as gt
import redis as rd
import numpy as np
import indexing
import statistics
import normalizations
import config
import percolation
import visualization
import datetime as dt


class MetricCalculator(object):
  def __init__ (self, graph, graph_gt):
    #class constructor
    #define required class variables such as the graph to work on, the redis connection and the nodes of the graph

    print ('Starting metric_calculator!')

    # for code evaluation
    self.start_time           = dt.datetime.now()
    self.durations            = {}
    self.durations_in_seconds = {}
    self.durations_in_percent = {}

    self.graph           = graph
    self.graph_gt        = graph_gt

    # alternate name for graph tool graph
    self.g               = self.graph_gt['graph_gt']
    # alternate name for graph tool labels
    self.g.vp.label_map  = self.graph_gt['graph_gt_labels']
    self.label_map       = self.g.vp.label_map
    # vertex property map for percolation calculations
    self.g.vp.exmap      = self.g.new_vertex_property("bool") #internalizes map
    self.exclusion_map   = self.g.vp.exmap
    self.exclusion_map.a = 1 #initialise filter map
    #find largest component of graph tool graph for percolation calculations
    # percolation = 1 - largest_component(percolated_graph)/largest_component(intact_graph)
    self.glc             = gt.GraphView(self.g, vfilt = gt.label_largest_component(self.g))


    self.redis           = rd.StrictRedis(host=config.REDIS_HOST, port=config.REDIS_PORT, db=1)
    self.nodes           = nx.nodes(graph)


    # configuration variables are read from the config file and are also saved to class variables for easy access
    self.graph_index_key = config.graph_index_key

    self.graph_name = 'coria-graph'
    while (self.graph_name == ''):
      self.graph_name = raw_input("Please enter name of graph. This will be used for storing results.\n")

    self.info_index_key        = self.graph_name+':'+config.info_index_key
    self.node_index_key        = self.graph_name+':'+config.node_index_key
    self.metric_index_key      = self.graph_name+':'+config.metric_index_key
    self.score_index_key       = self.graph_name+':'+config.score_index_key
    self.percolation_index_key = self.graph_name+':'+config.percolation_index_key
    self.layout_index_key      = self.graph_name+':'+config.layout_index_key

    self.node_neighbors_prefix = self.graph_name+':'+config.node_neighbors_prefix
    self.node_prefix           = self.graph_name+':'+config.node_prefix
    self.metric_prefix         = self.graph_name+':'+config.metric_prefix
    self.score_prefix          = self.graph_name+':'+config.score_prefix
    self.statistics_prefix     = self.graph_name+':'+config.statistics_prefix
    self.percolation_prefix    = self.graph_name+':'+config.percolation_prefix

    self.normalization_suffix       = config.normalization_suffix

    self.base_metrics               = config.base_metrics
    self.advanced_metrics           = config.advanced_metrics

    self.normalization_methods      = config.normalization_methods

    self.scores                     = config.scores
    self.advanced_scores            = config.advanced_scores

    self.visualization_layouts      = config.visualization_layouts

    self.percolation_modes          = config.percolation_modes
    self.advanced_percolation_modes = config.advanced_percolation_modes

##############################################################################
###### start describes the entire calculation in a high level overview #######
##############################################################################

  def start(self):
    start_time_calculation = dt.datetime.now()

    #preliminary calculations
    self.flush_database()
    self.obtain_percentages()
    self.create_info()
    self.create_standard_layout()
    self.save_graph_data('raw')

    #index creation
    self.create_indexes()

    #main calculations
    self.calculate_metrics()
    self.calculate_advanced_metrics()
    self.normalize_metrics()
    self.calculate_scores()
    self.calculate_advanced_scores()

    #statistics
    self.calculate_statistics()

    #dynamic metrics / percolation
    self.calculate_percolation()

    #visualization
    self.visualize_graph()

    #save final graph
    self.save_graph_data('full')

    #evaluation
    self.duration_total = dt.datetime.now() - start_time_calculation
    self.evaluate_durations()


###################
## PRELIMINARIES ##
###################
  def flush_database(self):
    # Check if FLUSH_REDIS_DB is set to True and flush the DB
    if config.FLUSH_REDIS_DB:
        self.redis.flushdb()

  def obtain_percentages(self):
    # obtain percentages for calculation of deterioration #
    # and calculate number of nodes to remove from graph ##
    percentages = config.PERCOLATION_PERCENTAGES # initialise
    while (percentages == ''):
      percentages = raw_input("Please enter percentages of nodes to remove for the calculation of percolation. (10 is interpreted as 10%. If multiple percentages are given they must be separated by whitespace, e.g. \"1 2 5 10\".)\n")

    percentages = sorted([float(pct)for pct in percentages.split()])
    numbers = [int(float(self.glc.num_vertices())*(pct/100.0)+0.5) for pct in percentages]
    # create a dictionary of percentages and corresponding numbers of nodes
    self.percentages = dict(zip(numbers,percentages))
    # storing values in redis DB
    self.redis.hset(self.info_index_key, 'percentages', ';'.join([str(pct) for pct in percentages]))

  def create_info(self):
    #store general info about graph
    self.redis.hset(self.info_index_key, 'number_of_nodes', self.g.num_vertices())
    self.redis.hset(self.info_index_key, 'number_of_edges', self.g.num_edges())

  def create_standard_layout(self):
    # create a standard layout
    start_time = dt.datetime.now()
    print 'Creating standard layout for graph visualization.'
    if not hasattr(self.g.vp, 'sfdp'):
      self.sfdp = gt.sfdp_layout(self.g, C=0.5)
      self.g.vp['sfdp'] = self.sfdp
    else:
      self.sfdp = self.g.vp['sfdp']
    self.durations['SFDP_layout'] = dt.datetime.now() - start_time

  def save_graph_data(self,name):
    # save graph
    start_time = dt.datetime.now()
    print 'Saving raw graph data'
    self.g.save(self.graph_name+'_'+name+'.gt.gz')
    self.durations['saving_graph'+name] = dt.datetime.now() - start_time

##################
#### INDEXING ####
##################
  def create_indexes(self):
    start_time = dt.datetime.now()
    #call methods defined in indexing.py
    indexing.index_graph(self)
    indexing.index_nodes(self)
    indexing.index_neighbors(self)
    indexing.index_metrics(self)
    indexing.index_scores(self)
    #indexing.index_percolation(self)
    self.durations['indexing'] = dt.datetime.now() - start_time

###########################
#### CALCULATION LOOPS ####
###########################

  def calculate_metrics(self):
    start_time_total = dt.datetime.now()
    # loop through all defined metrics and call specified calculation method for each node
    print ('Starting calculate_metrics')
    for metric_name in self.base_metrics:
      start_time = dt.datetime.now()
      metric_method = self.base_metrics[metric_name]

    # loop through all nodes
      for node in self.nodes:
        # call calculation method of supplied metric for current node
        node = int(node)
        value = float(metric_method(self,node))

        #store result in node values
        self.redis.hset(self.node_prefix+str(node), metric_name, value)

        #also store result to metric set
        self.redis.zadd(self.metric_prefix+metric_name, value, str(node))
      self.durations['metrics:'+metric_name] = dt.datetime.now() - start_time
    self.durations['metrics_total'] = dt.datetime.now() - start_time_total


  def calculate_advanced_metrics(self):
    start_time_total = dt.datetime.now()
    # loop through all defined_advanced_metrics and call specified calculation method
    print ('Starting calculate_advanced_metrics')
    for advanced_metric_name in self.advanced_metrics:
      start_time = dt.datetime.now()
      metric_method = self.advanced_metrics[advanced_metric_name]

      # loop through all nodes
      for node in self.nodes:
        node = int(node)
        value = float(metric_method(self,node))

        #store result in node values
        self.redis.hset(self.node_prefix+str(node), advanced_metric_name, value)

        #also store result to metric set
        self.redis.zadd(self.metric_prefix+advanced_metric_name, value, str(node))
      self.durations['adv_metrics:'+advanced_metric_name] = dt.datetime.now() - start_time
    self.durations['adv_metrics_total'] = dt.datetime.now() - start_time_total


  # loop through all defined normalizations and call respective normalization method
  # no default normalizations for metrics not listed in the "normalization_methods" hash
  def normalize_metrics(self):
    start_time = dt.datetime.now()
    #fallback normalization: min-max
    print ('Starting normalize_metrics')
    all_metrics = dict(self.base_metrics.items() + self.advanced_metrics.items())

    for metric_name in all_metrics:
      if self.normalization_methods.has_key(metric_name):
        normalization_method = self.normalization_methods[metric_name]
      else:
        #fallback normalization is min-max
        normalization_method = normalizations.min_max
      normalization_method(self,metric_name)

    self.durations['normalizing'] = dt.datetime.now() - start_time


  def calculate_scores(self):
    start_time = dt.datetime.now()
    print ('Starting calculate_scores')
    for score_name in self.scores:
      metrics_with_weights = self.scores[score_name]

      for node in self.nodes:
        score_value = 0.0

        # get normalized values
        for metric in metrics_with_weights:
          weight = self.scores[score_name][metric]
          value = float(self.redis.hget(self.node_prefix+str(node),metric+self.normalization_suffix))
          score_value += weight * value

         #redis_server.hset(key, value, number);
        self.redis.hset(self.node_prefix+str(node),score_name, score_value)

        self.redis.zadd(self.score_prefix+score_name, score_value, str(node))

    self.durations['scores'] = dt.datetime.now() - start_time

  def calculate_advanced_scores(self):
    start_time = dt.datetime.now()
    print ('Starting calculate_advanced_scores')
    for advanced_score in self.advanced_scores:
      self.advanced_scores[advanced_score](self)

    self.durations['adv_scores'] = dt.datetime.now() - start_time


  #############
  # statistics
  #############

  def calculate_statistics(self):
    start_time = dt.datetime.now()
    print ('Starting calculate_statistics')
    for metric in self.base_metrics:
      #absolute and normalized
      statistics.calculate_statistics(self, metric, self.metric_prefix+metric)
      statistics.calculate_statistics(self, metric+self.normalization_suffix, self.metric_prefix+metric+self.normalization_suffix)

    for advanced_metric in self.advanced_metrics:
      #absolute and normalized
      statistics.calculate_statistics(self, advanced_metric, self.metric_prefix+advanced_metric)
      statistics.calculate_statistics(self, advanced_metric+self.normalization_suffix, self.metric_prefix+advanced_metric+self.normalization_suffix)

    for score in self.scores:
      statistics.calculate_statistics(self, score, self.score_prefix+score)

    for advanced_score in self.advanced_scores:
      statistics.calculate_statistics(self, advanced_score, self.score_prefix+advanced_score)
    self.durations['statistics:stats'] = dt.datetime.now() - start_time

    start_time = dt.datetime.now()
    statistics.calculate_correlations(self)
    self.durations['statistics:corr'] = dt.datetime.now() - start_time

  ###################
  # dynamic metrics #
  ###################

  def calculate_percolation(self):
    start_time_total = dt.datetime.now()
    print ('Starting percolation calculation')

    # shorten the name for percentages and corresponding numbers of nodes to remove
    n = self.percentages

    # BASIC PERCOLATION MODES
    # basic percolation modes take mode_name and n as input and return a #
    # dictionary with percentage of nodes removed as key and percentage ##
    # of deterioration as value
    for mode_name in self.percolation_modes:
      start_time = dt.datetime.now()
      # initialise exlusion vertex property map
      self.exclusion_map.a = 1
      # read method from config file
      mode_method = self.percolation_modes[mode_name]
      # execute method
      results = mode_method(self,mode_name,n)
      # index percolation mode
      self.redis.sadd(self.percolation_index_key, mode_name)
      # store values
      print 'Storing percolation percentages'
      for percentage in results:
        value = results[percentage]
        #store in hash set
        self.redis.hset(self.percolation_prefix+mode_name, percentage, value)

      self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time

    # ADVANCED PERCOLATION MODES
    # advanced percolation modes take mode_name and n as input and return a ###
    # dictionary with groups of percolation modes (e.g. metrics, countries) ###
    # as keys and dictionaries of percentages (removed: deteriorated) as values
    for mode_name in self.advanced_percolation_modes:
      start_time = dt.datetime.now()
      # initialise exlusion vertex property map
      self.exclusion_map.a = 1
      # read method from config file
      mode_method = self.advanced_percolation_modes[mode_name]
      # execute method
      results = mode_method(self,mode_name,n)

      # store values
      print 'Storing percolation percentages'
      for group in results:
        # index percolation modes
        self.redis.sadd(self.percolation_index_key, mode_name+':'+group)
        for percentage in results[group]:
          value = results[group][percentage]
        #store in hash set
          self.redis.hset(self.percolation_prefix+mode_name+':'+group, percentage, value)

      self.durations['percolation:'+mode_name] = dt.datetime.now() - start_time

    self.durations['percolation_total'] = dt.datetime.now() - start_time_total


  def visualize_graph(self):

    for layout_name in self.visualization_layouts:
      start_time = dt.datetime.now()
      print 'Creating visualisation with '+layout_name+' layout'

      layout_method = self.visualization_layouts[layout_name]
      pos = layout_method(self)
      gt.graph_draw(self.glc, pos=pos,  output="/CORIA/coria-frontend/pics/"+self.graph_name+"_"+layout_name+".png")

      self.redis.sadd(self.layout_index_key, layout_name)
      self.durations['layout:'+layout_name] = dt.datetime.now() - start_time

  def evaluate_durations(self):
    #print out times taken
    print 'times taken:'
    output = open(str(self.graph_name)+"_duration_test.txt","w")
    output.write("Graph Name:\t"+str(self.graph_name)+"\n")
    output.write("Number of Vertices:\t"+str(self.g.num_vertices())+"\n")
    output.write("Number of Edges:\t"+str(self.g.num_edges())+"\n")
    output.write("Percentages of deleted vertices:\t"+'\t'.join(map(str,self.percentages.values()))+"\n\n")
    output.write("Calculation Step:\tDuration\tDuration in Seconds\tDuration in Percent\n")
    for key in self.durations:
      self.durations_in_seconds[key] = self.durations[key].total_seconds()
      self.durations_in_percent[key] = float(self.durations_in_seconds[key])/float(self.duration_total.total_seconds())*100.0

      print str(key)+'\t'+str(self.durations_in_percent[key])
      output.write(str(key)+'\t'+str(self.durations[key])+'\t'+str(self.durations_in_seconds[key])+'\t'+str(self.durations_in_percent[key])+'\n')

Mode	Type	Size	Ref	File
100644	blob	6	0d20b6487c61e7d1bde93acf4a14b7a89083a16d	.gitignore
100644	blob	112	cb89d830628675849b4fc506e25faaee9ada3055	README.md
100644	blob	39806	a3ed366a3ca7fa6017d524c37cb15d492ef7a0ab	RU_ASN.txt
040000	tree	-	fe0210446ce6fae4108a4700bb1ca9d427592d96	RedisHelpers
100644	blob	1256	489636a0073e3dfe2bfd04ee893d609d304a8490	advancedscores.py
100644	blob	1175	51d93965eefe7cc74022747e2b9207a5d619dacc	advancedscores.pyc
100644	blob	1420	985dee55c7d8134c4860db8bcccced703b2eb0af	change_output_file.py
100644	blob	7894	4cad512ec750c425bfc4028598ceb8e49579499b	config.py
100644	blob	2752	04b1176c3586c57b214cf3b20c110c4bd0956d25	config.pyc
040000	tree	-	085c594868d9df498c937b91dc7a43e02fb9a934	data
100644	blob	4571	88c66d8e136505e5a29e3aeb8e6e7e131424f4e3	file_importer.py
100644	blob	2190	9b19358d5f4c39da2da330a4dba4416ae089ff61	file_importer.pyc
100644	blob	633	df288a704ff3390fbe5f3090d488fe45df79c23a	graph_test.py
100644	blob	901	ae318bee51ebd11d4524c90cc13193f7e8bc208f	gt_file_importer.py
100644	blob	1366	12f8577e0a31cee71b70c20849d18ebd36174796	gt_file_importer.pyc
100644	blob	2064	728489f88ef880350a83f4ab8b1e0510239ce067	gt_start.py
100644	blob	858	28f726f3a2fe224a374f24b299fdfdc8bef9f569	indexing.py
100644	blob	1571	68587bbbe3043dbed78d9b3b3018e802e93c6931	indexing.pyc
100644	blob	33232205	7ca601d1ca32abf3244359ce3ad85ea6a1b60010	log
100644	blob	15904	733b22f32ecb738568099b60b22c781df40dd067	metric_calculator.py
100644	blob	11204	b7d78f3bc365a4639c4b462f1724b732d8a11122	metric_calculator.pyc
100644	blob	14963	6692689a02b4cca1b60e568579723eedb748beec	metrics.py
100644	blob	11616	a56470059e373ca2da9ac235a5457235a3a2fef8	metrics.pyc
100644	blob	61770227	1adc4f608c6a5fb41881af629b708af873dfed5f	nohup.out
100644	blob	1665	a959a8cc528f486a80a84e2ab233457870d255a1	normalizations.py
100644	blob	1402	85e8e32fc6b18fabf451d9290c9d33b95d453f5e	normalizations.pyc
100644	blob	10925	a32ff93ffd1cfd5603eb93aa4bd63b921c93ee66	percolation.py
100644	blob	6411	e81344c0f958c1b9ef244749208d4c4f6ca91bd0	percolation.pyc
100644	blob	1445	5c06bec9674e666d2a5633804e1579df1acb6a9e	ru_file_importer.py
100644	blob	1976	2ec49e610f1a124fa66df97f6fd6e6bb230797e4	ru_file_importer.pyc
100644	blob	16641	ee1f7cf9d0fabd7fb76787e8c375cafd9e5c9f35	ru_metric_calculator.py
100644	blob	11412	3d1104701e131696433e467a5bef30a589105865	ru_metric_calculator.pyc
100644	blob	2101	faf282d2fa893c507daa12ae97d29407238b3748	ru_start.py
100755	blob	2122	2fa307ee1f510eefc21af359b48003c3b368c85b	start.py
100644	blob	2150	cf8423b852f3c09938906493ea831d0cfbbec941	statistics.py
100644	blob	2126	f8f55b53a1757681125d5af18c87980b44afe6bb	statistics.pyc
100644	blob	1107	90c62617dd2c7340c2f3d31a3f485d30f9736865	test.py
100644	blob	6574	39e8f0848567e9d6836abb6283290ab35f5fdde3	visualization.py
100644	blob	5643	a4a569c1d73df7dec64e48b266a38bb481e0c0bb	visualization.pyc

Hints:
Before first commit, do not forget to setup your git environment:

git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):

git clone https://rocketgit.com/user/coria/coria-backend

Clone this repository using ssh (do not forget to upload a key first):

git clone ssh://rocketgit@ssh.rocketgit.com/user/coria/coria-backend

Clone this repository using git:

git clone git://git.rocketgit.com/user/coria/coria-backend

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:

... clone the repository ...
... make some changes and some commits ...
git push origin main