# -*- coding: utf-8 -*-
"""
Created on Fri Oct 4 11:58:08 2019
@author: iam
"""
import numpy as np
import numpy.ma as ma
import scipy.stats as stats
from scipy.spatial import cKDTree
from scipy import interpolate
import collections # for defaultdict
from .IS_stat import IS
from .candybox import CandyBox
# mizí národní slovička :(
# gradient musí být funkcí!
def progress(estimations, sampled_plan_R, Z, gradient, f_i, nis=500, budget=20000, L2_metric=False):
failsi = Z < 0
if not 'L2_Voronoi_cKDTree_failure_rate' in estimations:
estimations['L2_Voronoi_cKDTree_failure_rate'] = [[0], [0]]
for nsim in range(max(estimations['L2_Voronoi_cKDTree_failure_rate'][0])+1, len(sampled_plan_R)+1):
print("L2_Voronoi_cKDTree_failure_rate, krok: ", nsim)
if nsim<1:
continue
estimations['L2_Voronoi_cKDTree_failure_rate'][0].append(nsim)
estimations['L2_Voronoi_cKDTree_failure_rate'][1].append(L2_Voronoi_cKDTree_failure_rate(sampled_plan_R[:nsim], failsi[:nsim], gradient, f_i))
if not 'Rbf_approximation' in estimations:
estimations['Rbf_approximation'] = [[0], [0]]
estimations['L2_Voronoi_cKDTree_test'] = [[0], [0]]
for nsim in range(max(estimations['Rbf_approximation'][0])+1, len(sampled_plan_R)+1):
if nsim<=1:
continue
print("Rbf_approximation, krok: ", nsim)
Rbf_approximation, L2_Voronoi_cKDTree_test = L2_Voronoi_cKDTree(sampled_plan_R[:nsim], Z[:nsim], f_i, nis=50000)
estimations['Rbf_approximation'][0].append(nsim)
estimations['Rbf_approximation'][1].append(Rbf_approximation)
estimations['L2_Voronoi_cKDTree_test'][0].append(nsim)
estimations['L2_Voronoi_cKDTree_test'][1].append(L2_Voronoi_cKDTree_test)
# za indikatorovou statistiku můžeme třeba považovat 'L1_Voronoi_failure_rate'
if not 'L1_Voronoi_failure_rate' in estimations:
estimations['L1_Voronoi_failure_rate'] = [[0], [0]]
for nsim in range(max(estimations['L1_Voronoi_failure_rate'][0])+1, len(sampled_plan_R)+1):
if nsim<=2:
continue
print("2 point voronoi, krok: ", nsim)
L1_Voronoi_2_point_estimators = Voronoi_2_point_estimations(sampled_plan_R[:nsim], failsi[:nsim], gradient, f_i, budget=20000, L2_metric=False)
for statistic, value in L1_Voronoi_2_point_estimators.items():
if not statistic in estimations:
estimations[statistic] = [[0], [0]]
estimations[statistic][0].append(nsim)
estimations[statistic][1].append(value)
return estimations
#
# l2 Voronoi cKDTree IS localization
#
# Rbf_estimation so far
def L2_Voronoi_cKDTree(sample_box, nis=50000):
nsim, nvar = np.shape(sampled_plan_R)
# Estimation of pf given a list of red and green points (Voronoi)
sampled_plan_R_ma = np.ma.asarray(sampled_plan_R)
# tady bych nedaval Rd
#grad = np.abs(gradient([0 for j in range(nvar)]))
tree = cKDTree(sampled_plan_R)
rbf_func = interpolate.Rbf(sampled_plan_R[:,0],sampled_plan_R[:,1],Z)
Rbf_estimation = 0 # inicializace
L2_Voronoi_cKDTree_test = 0
#failure_points_indexes = np.argwhere(failsi)
points_probabilities = np.empty(nsim) # probabilities of partial (failure) event
# loop over points (need to integrate red regions)
for i in range(nsim): # loop over failing points only
sampled_plan_R_ma.mask = ma.nomask
sampled_plan_R_ma[i] = ma.masked
# neosvědčílo se
#==============================================================================
# delta_Rd_matrix = sampled_plan_Rd_ma - sampled_plan_Rd[i]
# mindist = [np.min(np.abs(np.where(delta_Rd_matrix[:,j] < 0,delta_Rd_matrix[:,j], f_i[j].std() ))) + np.min(np.abs(np.where(delta_Rd_matrix[:,j] > 0,delta_Rd_matrix[:,j], f_i[j].std() ))) for j in range(nvar)]
#
# # set the minimum distance as the standard deviation of IS densisty
# h_i = [stats.norm(sampled_plan_Rd[i,j], mindist[j] ) for j in range(nvar)] #! dosadit standard deviation pddle chutí
#==============================================================================
# find distance to the nearest sampling point (from all points)
mindist = np.min(np.sum(np.square(sampled_plan_R_ma - sampled_plan_R[i]), axis=1))**0.5
# set the minimum distance as the standard deviation of IS densisty
h_i = [stats.norm(sampled_plan_R[i,j], 2*mindist ) for j in range(nvar)] #! dosadit standard deviation pddle chutí
# use IS sampling density with center equal to the current "red" point
# select nis = 100 points from IS density and
# if the point has its nearest neighbor any red point from the sampled_plan,
h_plan = np.zeros((nis, nvar))
for j in range(nvar):
h_plan[:, j] = h_i[j].ppf(np.random.random(nis)) # realizace váhové funkce náhodné veličiny
# Rozptyl corrected IS
weights_sim = np.prod([f_i[j].pdf(h_plan[:, j]) / h_i[j].pdf(h_plan[:, j]) for j in range(nvar)], axis=0) # [f1/h1, ..., fn/hn]
dd, ii = tree.query(h_plan)
Vor_mask = np.where(ii==i, True, False)
zm = rbf_func(h_plan[:, 0][Vor_mask], h_plan[:, 1][Vor_mask])
points_probabilities[i] = np.sum(weights_sim[Vor_mask][zm<0]) / nis
L2_Voronoi_cKDTree_test += np.sum(weights_sim[Vor_mask]) / nis
Rbf_estimation = np.sum(points_probabilities)
return Rbf_estimation, L2_Voronoi_cKDTree_test
#
# l2 Voronoi estimation
#
def L2_Voronoi_cKDTree_failure_rate(sample_box, space='Rn', nis=50000):
# Estimation of pf given a list of red and green points (Voronoi)
#
sampled_plan_given_space = getattr(sample_box, space)
sampled_plan_ma = np.ma.asarray(sampled_plan_given_space)
# tady bych dal Rd
tree = cKDTree(sampled_plan_given_space)
#nis = 500
L2_Voronoi_failure_rate = 0 # inicializace
points_probabilities = [] # probabilities of partial (failure) event
# loop over points (need to integrate red regions)
for i in sample_box.failure_points: # loop over failing points only
sampled_plan_ma.mask = ma.nomask
sampled_plan_ma[i] = ma.masked
# neosvědčílo se
#==============================================================================
# delta_Rd_matrix = sampled_plan_Rd_ma - sampled_plan_Rd[i]
# mindist = [np.min(np.abs(np.where(delta_Rd_matrix[:,j] < 0,delta_Rd_matrix[:,j], f_i[j].std() ))) + np.min(np.abs(np.where(delta_Rd_matrix[:,j] > 0,delta_Rd_matrix[:,j], f_i[j].std() ))) for j in range(nvar)]
#
# # set the minimum distance as the standard deviation of IS densisty
# h_i = [stats.norm(sampled_plan_Rd[i,j], mindist[j] ) for j in range(nvar)] #! dosadit standard deviation pddle chutí
#==============================================================================
# find distance to the nearest sampling point (from all points)
mindist = np.min(np.sum(np.square(sampled_plan_ma - sampled_plan_given_space[i]), axis=1))**0.5
# set the minimum distance as the standard deviation of IS densisty
h_i = [stats.norm(sampled_plan_given_space[i,j], 2*mindist ) for j in range(sample_box.nvar)] #! dosadit standard deviation pddle chutí
# use IS sampling density with center equal to the current "red" point
# select nis = 100 points from IS density and
# if the point has its nearest neighbor any red point from the sampled_plan,
h_plan = np.zeros((nis, sample_box.nvar))
for j in range(sample_box.nvar):
h_plan[:, j] = h_i[j].ppf(np.random.random(nis)) # realizace váhové funkce náhodné veličiny
# Rozptyl corrected IS
weights_sim = np.prod([f_i[j].pdf(h_plan[:, j]) / h_i[j].pdf(h_plan[:, j]) for j in range(nvar)], axis=0) # [f1/h1, ..., fn/hn]
dd, ii = tree.query(h_plan)
Vor_mask = np.where(ii==i, True, False)
points_probabilities.append(np.sum(weights_sim[Vor_mask]) / nis)
L2_Voronoi_failure_rate = np.sum(points_probabilities)
return L2_Voronoi_failure_rate
#
# Voronoi 2_point estimation
#
def Voronoi_2_point_estimation(sample_box, model_space='Rn', sampling_space=None, p_norm=1, gradient=None, budget=20000, callback=None):
"""
Voronoi_2_point estimations
budget=20000
"""
# tak, na rovinu
# нет ножек - нет мультиков
if gradient is None:
return Voronoi_2_point_cKDTree(sample_box, model_space=model_space,sampling_space=sampling_space,\
p_norm=p_norm, budget=budget, callback=callback)
if callback is None:
callback = lambda *_, **__: None
# jsou to informace pro callback
estimation={'method': "Voronoi_2_point_estimation", 'p_norm':p_norm, 'nis':nis}
estimation['model_space'] = model_space
estimation['sampling_space'] = sampling_space
nsim = sample_box.nsim
nis = max(round(budget/nsim), 100)
# vytahneme ze sample_boxu rozdělení
f = sample_box.sampled_plan
# já vím, že sample box pokážde failsi přepočítavá
failsi = sample_box.failsi
PDF = sample_box.pdf(model_space)
# zde provadím rozdělení na prostor, ve kterém vzorkujem
# a prostor "modelu", vô ktôrom, v podstatě, měříme vzdaleností
sampled_plan_model = getattr(sample_box, model_space)
if sampling_space is None:
sampling_space = model_space
# sing like sampling
sampled_plan_sing = sampled_plan_model
else:
sampled_plan_sing = getattr(sample_box, sampling_space)
sampled_plan_sing_ma = np.ma.asarray(sampled_plan_sing)
if sampling_space is None:
sampling_space = model_space
# sing like sampling
sampled_plan_sing = sampled_plan_model
tree = cKDTree(sampled_plan_model)
global_stats = collections.defaultdict(int)
# zde postupně v cyklu prochazíme všemi body (tj. vzorky)
# a omezujeme se pouse nejbližšími bodíkama
# tynhlenstím zajišťujeme disjunktnost
# a môžeme všechny nasbírané pravděpodobnosti jednoduše sčítat
for i in range(nsim): # loop over all points, not failing points only
sampled_plan_sing_ma.mask = ma.nomask
sampled_plan_sing_ma[i] = ma.masked
# zde nepouživám KDTree protože bych pokažde sestavovat strom z rouškováneho pole
# ale otázky p-normy a směrodatné odchylky zde jsou k velké diskuzi.
# find distance to the nearest sampling point (from all points)
mindist_sing = np.min(np.sum(np.square(sampled_plan_sing_ma - sampled_plan_sing[i]), axis=1))**0.5
# use IS sampling density with center equal to the current "red" point
# set the minimum distance as the standard deviation of IS densisty
h_i = [stats.norm(sampled_plan_sing[i,j], 2*mindist_sing) for j in range(nvar)] #! dosadit standard deviation podle chutí
h = f_models.UnCorD(h_i)
# select nis = 100 points from IS density
# sice to má nazev h_plan, ale nese rozdělení a hustoty v f-ku
h_plan = IS(f, h, space_from_h='R', space_to_f=sampling_space, Nsim=nis)
# součet váh nemá cenu kontrolovat, jedná tam nebude, nevyjde
"""
# dd - The distances to the nearest neighbors
# ii - The locations of the neighbors in self.data
# k - The list of k-th nearest neighbors to return.
# If k is an integer it is treated as a list of [1, … k] (range(1, k+1)).
# Note that the counting starts from 1
# p - Which Minkowski p-norm to use.
# 1 is the sum-of-absolute-values “Manhattan” distance 2 is the usual Euclidean distance
# infinity is the maximum-coordinate-difference distance
"""
h_plan_model = getattr(h_plan, model_space)
dd, ii = tree.query(h_plan_model, k=1, p=p_norm)
# nechám s velkým písmenkem
Vor_mask = np.where(ii==i, True, False)
h_plan_model_ma = h_plan_model[Vor_mask]
# kolik bodíků jsou nejbližší k mému vzorečkovi
# np.empty() implicitně má dtype=float
# tyhle blbosti ponechám jen kvůli callbackovi
node_pf_estimations = np.empty(len(h_plan_model_ma))
node_pf_pure_estimations = np.empty(len(h_plan_model_ma))# pure distance estimation
node_failsi = np.empty(len(h_plan_model_ma), dtype=np.bool) # for L1 Voronoi
# projdeme přes každej bodíček
for node_idx in range(len(h_plan_model_ma)):
# KDTree byl použit jen k rozdělení na disjunktní úseky, veškerej děj se odehravá tu
# a to je všechno kvůli gradientu
node = h_plan_model_ma[node_idx]
# axis=1 - sčítá všechy směry dohromady, vysledkem je 1D pole rozměru nsim
inode2points_model_matrix = np.sum(np.abs(((sampled_plan_model - node) * gradient(node))**p_norm), axis=1)
#print(inode2points_Rd_matrix)
"""
partition -
Creates a copy of the array with its elements rearranged in such a way that
the value of the element in k-th position is in the position it would be in a sorted array.
All elements smaller than the k-th element are moved before this element
and all equal or greater are moved behind it. The ordering of the elements in the two partitions is undefined.
"""
idx = np.argpartition(inode2points_model_matrix, 2)
node_failsi[node_idx] = failsi[idx[0]]
points_weight = PDF[idx[0]] / inode2points_model_matrix[idx[0]] + PDF[idx[1]] / inode2points_model_matrix[idx[1]]
points_distances = 1 / inode2points_model_matrix[idx[0]] + 1 / inode2points_model_matrix[idx[1]]
failure_weight = int(failsi[idx[0]]) * PDF[idx[0]] / inode2points_model_matrix[idx[0]]
failure_weight += int(failsi[idx[1]]) * PDF[idx[1]] / inode2points_model_matrix[idx[1]]
failure_distance = int(failsi[idx[0]]) / inode2points_model_matrix[idx[0]] + int(failsi[idx[1]]) / inode2points_model_matrix[idx[1]]
node_pf_estimations[node_idx] = failure_weight/points_weight
node_pf_pure_estimations[node_idx] = failure_distance/points_distances
cell_stats = dict()
# musí sa (na konci) rovnat jedne
# opravdu dělíme nis'em, jako v normálním IS
# nikoliv počtem příjatých bodíků,
# protože průměrná vaha o hodně mene významná metrika
cell_stats['cell_probability'] = np.sum(h_plan.w[Vor_mask]) / nis
cell_stats['Voronoi_2_point_upper_bound'] = np.sum(h_plan.w[Vor_mask]*np.ceil(node_pf_estimations)) / nis
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_pf_estimations) / nis
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_pf_pure_estimations) / nis
cell_stats['Voronoi_2_point_lower_bound'] = np.sum(h_plan.w[Vor_mask]*np.floor(node_pf_estimations)) / nis
cell_stats['Voronoi_failure_rate'] = np.sum(weights_sim[Vor_mask]*node_failsi) / nis
for key, value in cell_stats:
global_stats[key] += value
nodes=CandyBox(h_plan[Vor_mask], w=h_plan.w[Vor_mask], node_pf_estimations=node_pf_estimations,\
node_pf_pure_estimations=node_pf_pure_estimations, node_failsi=node_failsi)
# praská
callback(estimation=estimation, nodes=nodes, cell_stats=cell_stats)
return global_stats
# dedictvi
#for k in range(len(ii)):
# points_weigths[ii[k]] = points_weigths[ii[k]] + weights_sim[k] / nis
# near_neighbors[ii[k]] = near_neighbors[ii[k]] + 1
# Vor_mask[k] = failsi[ii[k]]