File wellmet/voronoi.py copied from file wellmet/estimation.py (similarity 53%) (mode: 100644) (index d27b8a7..fe19747) |
... |
... |
from . import sball |
21 |
21 |
|
|
22 |
22 |
|
|
23 |
23 |
|
|
|
24 |
|
#оӵ кык точкаен Вороной |
|
25 |
|
# KickTouchCayenneVoronoi |
|
26 |
|
#č hlavní myšlenka-pointa třídy je, |
|
27 |
|
#č že místo integrování buňka po buňce |
|
28 |
|
#č a pofiderního rozhodování |
|
29 |
|
#č kdy a jak odhady aktualizovat |
|
30 |
|
#č integrujeme zde všechny možné pary vzorků, |
|
31 |
|
#č kontakt po kontaktu, |
|
32 |
|
#č tj. oblastí ve kterých nejblížšimi jsou pár určitých vzorků |
|
33 |
|
#č Vyhody: |
|
34 |
|
#č + Získáváme strukturální informace o problému |
|
35 |
|
#č + Nemusíme integrovat všechno. Na zelený-zelený kontakty kašleme |
|
36 |
|
#č + V rámci kontaktu intropie je neměnná, vysledný potenciál je předvidatelný |
|
37 |
|
#č + Důraz integrace je právě na oblastéch zájmu, ne bůhví na čem u buňek. |
|
38 |
|
#č + Nejsou-li body příslyšné kontaktu, tak na ten pár vzorků můžeme vykašlat. |
|
39 |
|
#č U buňek člověk nikdy neví... |
|
40 |
|
#č |
|
41 |
|
#č Nevyhody: |
|
42 |
|
#č - Kvadrát. Kontaktů je nsim-krát víc jak buňek. |
|
43 |
|
#č Ale i tak počet je konečný. Zvladnutelný. A nezavisí na dimenzi prostoru. |
|
44 |
|
|
|
45 |
|
class ContactVoronoi: |
|
46 |
|
def __init__(self, sample_box, hull, model_space='G', sampling_space=None, \ |
|
47 |
|
p_norm=2, p_base=0.5, ns=1000, auto_update=True, |
|
48 |
|
on_cell_update=None, on_neighbours_update=None) |
|
49 |
|
self.sample_box = sample_box |
|
50 |
|
self.hull = hull |
|
51 |
|
self.model_space = model_space |
|
52 |
|
self.sampling_space = sampling_space |
|
53 |
|
self.p_norm = p_norm |
|
54 |
|
|
|
55 |
|
self.ns = ns |
|
56 |
|
self.auto_update = auto_update |
|
57 |
|
self.on_cell_update = on_cell_update |
|
58 |
|
self.on_neighbours_update = on_neighbours_update |
|
59 |
|
|
|
60 |
|
#č chcu, aby behem dalších iterací pulka (p_base) tečiček dopadala do buňky |
|
61 |
|
self.base_r = sball.Sball(sample_box.nvar).get_r(1 - p_base) |
|
62 |
|
|
|
63 |
|
|
|
64 |
|
self.nodes = {} |
|
65 |
|
self.direct_contacts = {} |
|
66 |
|
self.cell_stats = {} |
|
67 |
|
|
|
68 |
|
self.update() |
|
69 |
|
|
|
70 |
|
|
|
71 |
|
def _update(hull): |
|
72 |
|
if hull.auto_update: |
|
73 |
|
hull.update() |
|
74 |
|
|
|
75 |
|
|
|
76 |
|
#č já chcu, aby třída byla blbuvzdorná |
|
77 |
|
#č aby šlo ji vytvořit na prazdné skřiňce |
|
78 |
|
#č aby neotravovala chybáma |
|
79 |
|
def update(self): |
|
80 |
|
|
|
81 |
|
nsim = self.sample_box.nsim |
|
82 |
|
if nsim < 2: |
|
83 |
|
return None |
|
84 |
|
|
|
85 |
|
# # ощень, ощень скромненько |
|
86 |
|
# p_base = 0.3 # chcu, aby behem první iterace p_base tečiček jistě dopadlo do buňky |
|
87 |
|
# nis_base = int((sample_box.nvar+1)/p_base) |
|
88 |
|
# nis = int(max(budget/nsim, nis_base / stats.binom.sf(int(sample_box.nvar+1), nis_base, p_base))) |
|
89 |
|
|
|
90 |
|
#č já vím, že sample box pokážde failsi přepočítavá |
|
91 |
|
self.failsi = self.sample_box.failsi |
|
92 |
|
self.failsi = self.sample_box.failsi |
|
93 |
|
|
|
94 |
|
self.PDF = self.sample_box.pdf(self.model_space) |
|
95 |
|
|
|
96 |
|
#č zde provadím rozdělení na prostor, ve kterém vzorkujem |
|
97 |
|
#č a prostor "modelu", vô ktôrom, v podstatě, měříme vzdaleností |
|
98 |
|
sampled_plan_model = getattr(self.sample_box, model_space) |
|
99 |
|
self.tree = cKDTree(sampled_plan_model) |
|
100 |
|
|
|
101 |
|
|
|
102 |
|
if self.sampling_space is None: |
|
103 |
|
sampling_space = model_space |
|
104 |
|
# sing like sampling |
|
105 |
|
sampled_plan_sing = sampled_plan_model |
|
106 |
|
self.tree_sampling = self.tree |
|
107 |
|
else: |
|
108 |
|
sampled_plan_sing = getattr(self.sample_box, self.sampling_space) |
|
109 |
|
# narychlo, moc to nepomůže, neboť asi po 16 vzorcích počítá brut forsem |
|
110 |
|
self.tree_sampling = cKDTree(sampled_plan_sing, compact_nodes=False, balanced_tree=False) |
|
111 |
|
|
|
112 |
|
# find distance to the nearest sampling point (from all points) |
|
113 |
|
#č pro sampling bych vždy použival p_norm=2 |
|
114 |
|
dd2, ii2 = tree_sampling.query(sampled_plan_sing, k=[2], p=2) |
|
115 |
|
mindist_sing = dd2.flatten() |
|
116 |
|
|
|
117 |
|
|
|
118 |
|
#č já vím, že sample box je pokážde přepočítavá |
|
119 |
|
failure_points = self.sample_box.failure_points |
|
120 |
|
|
|
121 |
|
#č zde postupně v cyklu prochazíme všemi páry kontaktů |
|
122 |
|
#č a omezujeme se pouse nejbližšími vzorky |
|
123 |
|
#č disjunktnost je pořad zajištěna |
|
124 |
|
#č a môžeme všechny nasbírané pravděpodobnosti jednoduše sčítat |
|
125 |
|
for i in range(nsim): |
|
126 |
|
for j in range(i): |
|
127 |
|
#č není znamená. že nikdy ani nebylo |
|
128 |
|
if (i, j) in self.nodes: |
|
129 |
|
#č None znamená, že kontakt je pro nás pase |
|
130 |
|
if self.nodes[(i, j)] is not None: |
|
131 |
|
self.update_cell(i, j) |
|
132 |
|
|
|
133 |
|
#č (i, j)? poprvé ta čísla vidíme |
|
134 |
|
elif i in failure_points: |
|
135 |
|
if j in failure_points: |
|
136 |
|
#č červený kontakt |
|
137 |
|
self.integrate_cell(i) |
|
138 |
|
else: |
|
139 |
|
#č žlutý kontakt |
|
140 |
|
self.integrate_cell(i) |
|
141 |
|
else: |
|
142 |
|
#оӵ Вож. Кулэ ӧвӧл |
|
143 |
|
self.nodes[(i, j)] = None |
|
144 |
|
|
|
145 |
|
|
|
146 |
|
|
|
147 |
|
def update_cell(self, cell_id): |
|
148 |
|
pass |
|
149 |
|
|
|
150 |
|
def integrate_cell(self, cell_id): |
|
151 |
|
# use IS sampling density with center equal to the current point |
|
152 |
|
# set the minimum distance as the standard deviation of IS densisty |
|
153 |
|
# mindist dělíme dvěma - hranice buňky je na půlcesty |
|
154 |
|
# a dělíme base_r'kem k získání sigmy. Přemejmenším empiricky mně to vychází |
|
155 |
|
# (u Norm'u vždy zadaváme směrodatnou odchylku) |
|
156 |
|
h_i = [stats.norm(sampled_plan_sing[i,j], mindist_sing[i]/2/base_r) for j in range(sample_box.nvar)] |
|
157 |
|
h = f_models.UnCorD(h_i) |
|
158 |
|
|
|
159 |
|
|
|
160 |
|
# select nis points from IS density |
|
161 |
|
# sice to má nazev h_plan, ale nese rozdělení a hustoty v f-ku |
|
162 |
|
h_plan = IS(f, h, space_from_h='R', space_to_f=sampling_space, Nsim=nis) |
|
163 |
|
|
|
164 |
|
# součet váh nemá cenu kontrolovat, jedná tam nebude, nevyjde |
|
165 |
|
|
|
166 |
|
h_plan_model = getattr(h_plan, model_space) |
|
167 |
|
dd, ii = tree.query(h_plan_model, k=1, p=p_norm) |
|
168 |
|
|
|
169 |
|
# nechám s velkým písmenkem |
|
170 |
|
Vor_mask = ii==i |
|
171 |
|
h_plan_model_ma = h_plan_model[Vor_mask] |
|
172 |
|
weights_sim = h_plan.w[Vor_mask] |
|
173 |
|
# dd1 jsou vzdalenosti tečiček do centra Voroneho buňky |
|
174 |
|
dd1 = dd[Vor_mask] |
|
175 |
|
|
|
176 |
|
trace = 0 |
|
177 |
|
nis_ma = len(weights_sim) |
|
178 |
|
|
|
179 |
|
h_plan_sing_ma = getattr(h_plan, sampling_space)[Vor_mask] |
|
180 |
|
S_bc = np.cov(h_plan_sing_ma, rowvar=False, bias=True, aweights=weights_sim) |
|
181 |
|
barycenter = np.average(h_plan_sing_ma, axis=0, weights=weights_sim) |
|
182 |
|
# effective nis |
|
183 |
|
nis_eff = nis |
|
184 |
|
#print("start", i) |
|
185 |
|
|
|
186 |
|
while trace < np.trace(S_bc): |
|
187 |
|
# number of nodes inside the cell matters too |
|
188 |
|
trace = np.trace(S_bc)# * nis_ma |
|
189 |
|
#print(S_bc) |
|
190 |
|
#print(nis_ma) |
|
191 |
|
|
|
192 |
|
|
|
193 |
|
|
|
194 |
|
|
|
195 |
|
# matika |
|
196 |
|
w, v = np.linalg.eig(S_bc) |
|
197 |
|
|
|
198 |
|
# use IS sampling density with center equal to the cell's barycenter |
|
199 |
|
# set the minimum distance as the standard deviation of the IS densisty |
|
200 |
|
# u stats.norm zadáváme směrodatnou odchylku, to je asi správné |
|
201 |
|
sigmas = np.sqrt(w) / base_r #(sample_box.nvar+2) #! dosadit standard deviation podle chutí |
|
202 |
|
h_i = [stats.norm(0, sigmas[j]) for j in range(sample_box.nvar)] |
|
203 |
|
# rozdělení ve vlastním prostoru |
|
204 |
|
# select nis = 100 points from IS density |
|
205 |
|
h_L = f_models.UnCorD(h_i)(nis) |
|
206 |
|
|
|
207 |
|
# здесь уже так легко не отделаемся. Трансформовать кароно. |
|
208 |
|
h_plan_bc = (v @ h_L.R.T).T |
|
209 |
|
h_plan_sing = h_plan_bc + barycenter |
|
210 |
|
|
|
211 |
|
# sice to má nazev h_plan, ale nese rozdělení a hustoty v f-ku |
|
212 |
|
h_plan_part = f.new_sample(h_plan_sing, sampling_space) |
|
213 |
|
|
|
214 |
|
|
|
215 |
|
# jdeme na ty hustoty |
|
216 |
|
# mně příjde, že je to legalní |
|
217 |
|
# sice samply podporujou maskovaní, to je ale drahé |
|
218 |
|
weights_sim_part = h_plan_part.pdf(sampling_space) / h_L.pdf('R') # snad je to správně |
|
219 |
|
h_plan.add_sample(CandyBox(h_plan_part, w=weights_sim_part)) |
|
220 |
|
|
|
221 |
|
# vyfiltrujeme vzorky |
|
222 |
|
h_plan_model_part = getattr(h_plan_part, model_space) |
|
223 |
|
dd, ii = tree.query(h_plan_model_part, k=1, p=p_norm) |
|
224 |
|
|
|
225 |
|
# parta |
|
226 |
|
Vor_mask_part = ii==i |
|
227 |
|
weights_sim_part = weights_sim_part[Vor_mask_part] |
|
228 |
|
|
|
229 |
|
nis_ma = len(weights_sim_part) |
|
230 |
|
|
|
231 |
|
# zajišťovat Vor_mask je docela zbytečně, je to jen pro out_nodes, |
|
232 |
|
# které se zatím nikdě nepouživá |
|
233 |
|
Vor_mask = np.append(Vor_mask, Vor_mask_part) |
|
234 |
|
|
|
235 |
|
|
|
236 |
|
|
|
237 |
|
|
|
238 |
|
h_plan_model_ma = np.vstack((h_plan_model_ma, h_plan_model_part[Vor_mask_part])) |
|
239 |
|
weights_sim = np.append(weights_sim, weights_sim_part) |
|
240 |
|
# dd1 jsou vzdalenosti tečiček do centra Voroneho buňky |
|
241 |
|
dd1 = np.append(dd1, dd[Vor_mask_part]) |
|
242 |
|
|
|
243 |
|
# zkusím těžiště počitat jen pro partu - možná tak algoritmus bude agresivnější? |
|
244 |
|
#barycenter = np.average(h_plan_sing[Vor_mask_part], axis=0, weights=weights_sim_part) |
|
245 |
|
h_plan_sing_ma = np.vstack((h_plan_sing_ma, h_plan_sing[Vor_mask_part])) |
|
246 |
|
#S_bc = np.cov(h_plan_sing[Vor_mask_part], rowvar=False, bias=True, aweights=weights_sim_part) |
|
247 |
|
S_bc = np.cov(h_plan_sing_ma, rowvar=False, bias=True, aweights=weights_sim) |
|
248 |
|
barycenter = np.average(h_plan_sing_ma, axis=0, weights=weights_sim) |
|
249 |
|
nis_eff += nis |
|
250 |
|
|
|
251 |
|
|
|
252 |
|
|
|
253 |
|
#print(S_bc) |
|
254 |
|
#print(nis_ma) |
|
255 |
|
|
|
256 |
|
|
|
257 |
|
|
|
258 |
|
|
|
259 |
|
cell_stats = dict() |
|
260 |
|
# musí sa (na konci) rovnat jedne |
|
261 |
|
# opravdu dělíme nis'em, jako v normálním IS |
|
262 |
|
# nikoliv počtem příjatých bodíků, |
|
263 |
|
# protože průměrná vaha je o hodně mene významná metrika |
|
264 |
|
cell_stats['cell_probability'] = np.sum(weights_sim) / nis_eff |
|
265 |
|
|
|
266 |
|
|
|
267 |
|
# tu bacha! |
|
268 |
|
# takhle se počíta, pokud se netrapíme gradijentem |
|
269 |
|
# a je to trošiňku optimizovaný, takže čert se nevyzná |
|
270 |
|
if gradient is None: |
|
271 |
|
# indexy ii nás moc nezajimajou |
|
272 |
|
# vzdalenosti snad byjsme zvladli použit? |
|
273 |
|
|
|
274 |
|
|
|
275 |
|
dd2, ii2 = tree.query(h_plan_model_ma, k=[2], p=p_norm) |
|
276 |
|
dd2 = dd2.flatten() |
|
277 |
|
ii2 = ii2.flatten() |
|
278 |
|
|
|
279 |
|
# tahle hračka s indexy je pro numpy poměrně drahá |
|
280 |
|
failsii_2 = failsi[ii2] |
|
281 |
|
# jeden vzorek (včetně hustoty PDF[i]) je nám vždy znám |
|
282 |
|
# porucha |
|
283 |
|
if failsi[i]: |
|
284 |
|
points_1 = PDF[i] * dd2 |
|
285 |
|
node_pf_estimations = points_1 / (points_1 + PDF[ii2] * dd1) |
|
286 |
|
node_pf_estimations = np.where(failsii_2,1, node_pf_estimations) |
|
287 |
|
node_pf_pure_estimations = dd2 / (dd1 + dd2) |
|
288 |
|
node_pf_pure_estimations = np.where(failsii_2,1, node_pf_pure_estimations) |
|
289 |
|
|
|
290 |
|
cell_stats['Voronoi_2_point_upper_bound'] = cell_stats['cell_probability'] |
|
291 |
|
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(weights_sim * node_pf_estimations) / nis_eff |
|
292 |
|
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(weights_sim * node_pf_pure_estimations) / nis_eff |
|
293 |
|
cell_stats['Voronoi_2_point_lower_bound'] = np.sum(weights_sim[failsii_2]) / nis_eff |
|
294 |
|
cell_stats['Voronoi_failure_rate'] = cell_stats['cell_probability'] |
|
295 |
|
nodes=CandyBox(h_plan.sampling_plan[Vor_mask], w=weights_sim, node_pf_estimations=node_pf_estimations,\ |
|
296 |
|
node_pf_pure_estimations=node_pf_pure_estimations, dd1=dd1, dd2=dd2, ii2=ii2) |
|
297 |
|
|
|
298 |
|
# neporucha |
|
299 |
|
else: |
|
300 |
|
dd1 = dd1[failsii_2] |
|
301 |
|
dd2 = dd2[failsii_2] |
|
302 |
|
points_1 = PDF[i] * dd2 |
|
303 |
|
points_2 = PDF[ii2[failsii_2]] * dd1 |
|
304 |
|
|
|
305 |
|
node_pf_estimations = points_2 / (points_1 + points_2) |
|
306 |
|
node_pf_pure_estimations = dd1 / (dd1 + dd2) |
|
307 |
|
|
|
308 |
|
cell_stats['Voronoi_2_point_upper_bound'] = np.sum(weights_sim[failsii_2]) / nis_eff |
|
309 |
|
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(weights_sim[failsii_2]*node_pf_estimations) / nis_eff |
|
310 |
|
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(weights_sim[failsii_2] * node_pf_pure_estimations) / nis_eff |
|
311 |
|
cell_stats['Voronoi_2_point_lower_bound'] = 0 |
|
312 |
|
cell_stats['Voronoi_failure_rate'] = 0 |
|
313 |
|
nodes=CandyBox(h_plan.sampling_plan[Vor_mask][failsii_2], w=weights_sim[failsii_2], node_pf_estimations=node_pf_estimations,\ |
|
314 |
|
node_pf_pure_estimations=node_pf_pure_estimations, dd1=dd1, dd2=dd2, ii2=ii2[failsii_2]) |
|
315 |
|
|
|
316 |
|
# take something with corresponding length |
|
317 |
|
zeros = np.zeros(len(weights_sim) - len(dd2)) |
|
318 |
|
# add remaining nodes |
|
319 |
|
nodes.add_sample(CandyBox(h_plan.sampling_plan[Vor_mask][~failsii_2], w=weights_sim[~failsii_2], node_pf_estimations=zeros,\ |
|
320 |
|
node_pf_pure_estimations=zeros, ii2=ii2[~failsii_2])) |
|
321 |
|
|
|
322 |
|
|
|
323 |
|
# takhle - pokud chceme gradient použit |
|
324 |
|
# je třeba eště zoptimalizovať |
|
325 |
|
else: |
|
326 |
|
# kolik bodíků jsou nejbližší k mému vzorečkovi |
|
327 |
|
# np.empty() implicitně má dtype=float |
|
328 |
|
# tyhle blbosti ponechám jen kvůli callbackovi |
|
329 |
|
node_pf_estimations = np.empty(len(h_plan_model_ma)) |
|
330 |
|
node_pf_pure_estimations = np.empty(len(h_plan_model_ma))# pure distance estimation |
|
331 |
|
node_failsi = np.empty(len(h_plan_model_ma), dtype=np.bool) # for L1 Voronoi # co to je za L1 Voronoi? |
|
332 |
|
|
|
333 |
|
# projdeme přes každej bodíček |
|
334 |
|
for node_idx in range(len(h_plan_model_ma)): |
|
335 |
|
# KDTree byl použit jen k rozdělení na disjunktní úseky, veškerej děj se odehravá tu |
|
336 |
|
# a to je všechno kvůli gradientu |
|
337 |
|
node = h_plan_model_ma[node_idx] |
|
338 |
|
# axis=1 - sčítá všechy směry dohromady, vysledkem je 1D pole rozměru nsim |
|
339 |
|
inode2points_model_matrix = np.sum(np.abs(((sampled_plan_model - node) * gradient(node))**p_norm), axis=1) |
|
340 |
|
#print(inode2points_Rd_matrix) |
|
341 |
|
|
|
342 |
|
""" |
|
343 |
|
partition - |
|
344 |
|
Creates a copy of the array with its elements rearranged in such a way that |
|
345 |
|
the value of the element in k-th position is in the position it would be in a sorted array. |
|
346 |
|
All elements smaller than the k-th element are moved before this element |
|
347 |
|
and all equal or greater are moved behind it. The ordering of the elements in the two partitions is undefined. |
|
348 |
|
""" |
|
349 |
|
idx = np.argpartition(inode2points_model_matrix, 1) # musí tu bejt 1, počítá sa od nuly |
|
350 |
|
# je to správný, neboť numpy zaručuje, že druhej prvek (s indexem 1) bude na druhem místě |
|
351 |
|
node_failsi[node_idx] = failsi[idx[0]] |
|
352 |
|
|
|
353 |
|
|
|
354 |
|
points_weight = PDF[idx[0]] / inode2points_model_matrix[idx[0]] + PDF[idx[1]] / inode2points_model_matrix[idx[1]] |
|
355 |
|
points_distances = 1 / inode2points_model_matrix[idx[0]] + 1 / inode2points_model_matrix[idx[1]] |
|
356 |
|
|
|
357 |
|
failure_weight = int(failsi[idx[0]]) * PDF[idx[0]] / inode2points_model_matrix[idx[0]] |
|
358 |
|
failure_weight += int(failsi[idx[1]]) * PDF[idx[1]] / inode2points_model_matrix[idx[1]] |
|
359 |
|
|
|
360 |
|
failure_distance = int(failsi[idx[0]]) / inode2points_model_matrix[idx[0]] + int(failsi[idx[1]]) / inode2points_model_matrix[idx[1]] |
|
361 |
|
|
|
362 |
|
|
|
363 |
|
node_pf_estimations[node_idx] = failure_weight/points_weight |
|
364 |
|
node_pf_pure_estimations[node_idx] = failure_distance/points_distances |
|
365 |
|
|
|
366 |
|
|
|
367 |
|
cell_stats['Voronoi_2_point_upper_bound'] = np.sum(h_plan.w[Vor_mask]*np.ceil(node_pf_estimations)) / nis_eff |
|
368 |
|
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_pf_estimations) / nis_eff |
|
369 |
|
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_pf_pure_estimations) / nis_eff |
|
370 |
|
cell_stats['Voronoi_2_point_lower_bound'] = np.sum(h_plan.w[Vor_mask]*np.floor(node_pf_estimations)) / nis_eff |
|
371 |
|
cell_stats['Voronoi_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_failsi) / nis_eff |
|
372 |
|
|
|
373 |
|
nodes=CandyBox(h_plan.sampling_plan[Vor_mask], w=h_plan.w[Vor_mask], node_pf_estimations=node_pf_estimations,\ |
|
374 |
|
node_pf_pure_estimations=node_pf_pure_estimations, node_failsi=node_failsi) |
|
375 |
|
|
|
376 |
|
|
|
377 |
|
|
|
378 |
|
for key, value in cell_stats.items(): |
|
379 |
|
global_stats[key] += value |
|
380 |
|
|
|
381 |
|
# kolbek ↓ |
|
382 |
|
if failsi[i]: |
|
383 |
|
cell_stats['event'] = 'failure' |
|
384 |
|
else: |
|
385 |
|
cell_stats['event'] = 'success' |
|
386 |
|
callback(estimation=estimation, nodes=nodes, cell_stats=cell_stats, out_nodes=h_plan[~Vor_mask]) |
|
387 |
|
|
|
388 |
|
|
|
389 |
|
|
|
390 |
|
|
|
391 |
|
|
|
392 |
|
|
|
393 |
|
|
|
394 |
|
|
|
395 |
|
|
|
396 |
|
|
|
397 |
|
|
|
398 |
|
|
|
399 |
|
|
|
400 |
|
|
|
401 |
|
def Voronoi_2_point_estimation(sample_box, model_space='Rn', sampling_space=None, p_norm=1, budget=20000, callback=None): |
|
402 |
|
|
|
403 |
|
|
|
404 |
|
if callback is None: |
|
405 |
|
callback = lambda *_, **__: None |
|
406 |
|
|
|
407 |
|
nsim = sample_box.nsim |
|
408 |
|
# ощень, ощень скромненько |
|
409 |
|
p_base = 0.3 # chcu, aby behem první iterace p_base tečiček jistě dopadlo do buňky |
|
410 |
|
nis_base = int((sample_box.nvar+1)/p_base) |
|
411 |
|
nis = int(max(budget/nsim, nis_base / stats.binom.sf(int(sample_box.nvar+1), nis_base, p_base))) |
|
412 |
|
|
|
413 |
|
|
|
414 |
|
|
|
415 |
|
# vytahneme ze sample_boxu rozdělení |
|
416 |
|
f = sample_box.sampled_plan |
|
417 |
|
|
|
418 |
|
# já vím, že sample box pokážde failsi přepočítavá |
|
419 |
|
failsi = sample_box.failsi |
|
420 |
|
|
|
421 |
|
PDF = sample_box.pdf(model_space) |
|
422 |
|
|
|
423 |
|
# zde provadím rozdělení na prostor, ve kterém vzorkujem |
|
424 |
|
# a prostor "modelu", vô ktôrom, v podstatě, měříme vzdaleností |
|
425 |
|
sampled_plan_model = getattr(sample_box, model_space) |
|
426 |
|
tree = cKDTree(sampled_plan_model) |
|
427 |
|
|
|
428 |
|
|
|
429 |
|
if sampling_space is None: |
|
430 |
|
sampling_space = model_space |
|
431 |
|
# sing like sampling |
|
432 |
|
sampled_plan_sing = sampled_plan_model |
|
433 |
|
tree_sampling = tree |
|
434 |
|
else: |
|
435 |
|
sampled_plan_sing = getattr(sample_box, sampling_space) |
|
436 |
|
# narychlo, moc to nepomůže, neboť asi po 16 vzorcích počítá brut forsem |
|
437 |
|
tree_sampling = cKDTree(sampled_plan_sing, compact_nodes=False, balanced_tree=False) |
|
438 |
|
|
|
439 |
|
# find distance to the nearest sampling point (from all points) |
|
440 |
|
dd2, ii2 = tree_sampling.query(sampled_plan_sing, k=[2], p=p_norm) |
|
441 |
|
mindist_sing = dd2.flatten() |
|
442 |
|
|
|
443 |
|
# chcu, aby behem první iterace pulka (p_base) tečiček jistě dopadla do buňky |
|
444 |
|
base_r = sball.Sball(sample_box.nvar).get_r(1 - p_base) |
|
445 |
|
# np.sum(abs(stats.norm(0, 10).rvs(1000))<sb.get_r(.5)*10) |
|
446 |
|
|
|
447 |
|
|
|
448 |
|
|
|
449 |
|
global_stats = collections.defaultdict(int) |
|
450 |
|
|
|
451 |
|
|
|
452 |
|
|
|
453 |
|
# zde postupně v cyklu prochazíme všemi body (tj. vzorky) |
|
454 |
|
# a omezujeme se pouse nejbližšími bodíkama |
|
455 |
|
# tynhlenstím zajišťujeme disjunktnost |
|
456 |
|
# a môžeme všechny nasbírané pravděpodobnosti jednoduše sčítat |
|
457 |
|
for i in range(nsim): # loop over all points, not failing points only |
|
458 |
|
|
|
459 |
|
# use IS sampling density with center equal to the current point |
|
460 |
|
# set the minimum distance as the standard deviation of IS densisty |
|
461 |
|
# mindist dělíme dvěma - hranice buňky je na půlcesty |
|
462 |
|
# a dělíme base_r'kem k získání sigmy. Přemejmenším empiricky mně to vychází |
|
463 |
|
# (u Norm'u vždy zadaváme směrodatnou odchylku) |
|
464 |
|
h_i = [stats.norm(sampled_plan_sing[i,j], mindist_sing[i]/2/base_r) for j in range(sample_box.nvar)] |
|
465 |
|
h = f_models.UnCorD(h_i) |
|
466 |
|
|
|
467 |
|
|
|
468 |
|
# select nis points from IS density |
|
469 |
|
# sice to má nazev h_plan, ale nese rozdělení a hustoty v f-ku |
|
470 |
|
h_plan = IS(f, h, space_from_h='R', space_to_f=sampling_space, Nsim=nis) |
|
471 |
|
|
|
472 |
|
# součet váh nemá cenu kontrolovat, jedná tam nebude, nevyjde |
|
473 |
|
|
|
474 |
|
h_plan_model = getattr(h_plan, model_space) |
|
475 |
|
dd, ii = tree.query(h_plan_model, k=1, p=p_norm) |
|
476 |
|
|
|
477 |
|
# nechám s velkým písmenkem |
|
478 |
|
Vor_mask = ii==i |
|
479 |
|
h_plan_model_ma = h_plan_model[Vor_mask] |
|
480 |
|
weights_sim = h_plan.w[Vor_mask] |
|
481 |
|
# dd1 jsou vzdalenosti tečiček do centra Voroneho buňky |
|
482 |
|
dd1 = dd[Vor_mask] |
|
483 |
|
|
|
484 |
|
trace = 0 |
|
485 |
|
nis_ma = len(weights_sim) |
|
486 |
|
|
|
487 |
|
h_plan_sing_ma = getattr(h_plan, sampling_space)[Vor_mask] |
|
488 |
|
S_bc = np.cov(h_plan_sing_ma, rowvar=False, bias=True, aweights=weights_sim) |
|
489 |
|
barycenter = np.average(h_plan_sing_ma, axis=0, weights=weights_sim) |
|
490 |
|
# effective nis |
|
491 |
|
nis_eff = nis |
|
492 |
|
#print("start", i) |
|
493 |
|
|
|
494 |
|
while trace < np.trace(S_bc): |
|
495 |
|
# number of nodes inside the cell matters too |
|
496 |
|
trace = np.trace(S_bc)# * nis_ma |
|
497 |
|
#print(S_bc) |
|
498 |
|
#print(nis_ma) |
|
499 |
|
|
|
500 |
|
|
|
501 |
|
|
|
502 |
|
|
|
503 |
|
# matika |
|
504 |
|
w, v = np.linalg.eig(S_bc) |
|
505 |
|
|
|
506 |
|
# use IS sampling density with center equal to the cell's barycenter |
|
507 |
|
# set the minimum distance as the standard deviation of the IS densisty |
|
508 |
|
# u stats.norm zadáváme směrodatnou odchylku, to je asi správné |
|
509 |
|
sigmas = np.sqrt(w) / base_r #(sample_box.nvar+2) #! dosadit standard deviation podle chutí |
|
510 |
|
h_i = [stats.norm(0, sigmas[j]) for j in range(sample_box.nvar)] |
|
511 |
|
# rozdělení ve vlastním prostoru |
|
512 |
|
# select nis = 100 points from IS density |
|
513 |
|
h_L = f_models.UnCorD(h_i)(nis) |
|
514 |
|
|
|
515 |
|
# здесь уже так легко не отделаемся. Трансформовать кароно. |
|
516 |
|
h_plan_bc = (v @ h_L.R.T).T |
|
517 |
|
h_plan_sing = h_plan_bc + barycenter |
|
518 |
|
|
|
519 |
|
# sice to má nazev h_plan, ale nese rozdělení a hustoty v f-ku |
|
520 |
|
h_plan_part = f.new_sample(h_plan_sing, sampling_space) |
|
521 |
|
|
|
522 |
|
|
|
523 |
|
# jdeme na ty hustoty |
|
524 |
|
# mně příjde, že je to legalní |
|
525 |
|
# sice samply podporujou maskovaní, to je ale drahé |
|
526 |
|
weights_sim_part = h_plan_part.pdf(sampling_space) / h_L.pdf('R') # snad je to správně |
|
527 |
|
h_plan.add_sample(CandyBox(h_plan_part, w=weights_sim_part)) |
|
528 |
|
|
|
529 |
|
# vyfiltrujeme vzorky |
|
530 |
|
h_plan_model_part = getattr(h_plan_part, model_space) |
|
531 |
|
dd, ii = tree.query(h_plan_model_part, k=1, p=p_norm) |
|
532 |
|
|
|
533 |
|
# parta |
|
534 |
|
Vor_mask_part = ii==i |
|
535 |
|
weights_sim_part = weights_sim_part[Vor_mask_part] |
|
536 |
|
|
|
537 |
|
nis_ma = len(weights_sim_part) |
|
538 |
|
|
|
539 |
|
# zajišťovat Vor_mask je docela zbytečně, je to jen pro out_nodes, |
|
540 |
|
# které se zatím nikdě nepouživá |
|
541 |
|
Vor_mask = np.append(Vor_mask, Vor_mask_part) |
|
542 |
|
|
|
543 |
|
|
|
544 |
|
|
|
545 |
|
|
|
546 |
|
h_plan_model_ma = np.vstack((h_plan_model_ma, h_plan_model_part[Vor_mask_part])) |
|
547 |
|
weights_sim = np.append(weights_sim, weights_sim_part) |
|
548 |
|
# dd1 jsou vzdalenosti tečiček do centra Voroneho buňky |
|
549 |
|
dd1 = np.append(dd1, dd[Vor_mask_part]) |
|
550 |
|
|
|
551 |
|
# zkusím těžiště počitat jen pro partu - možná tak algoritmus bude agresivnější? |
|
552 |
|
#barycenter = np.average(h_plan_sing[Vor_mask_part], axis=0, weights=weights_sim_part) |
|
553 |
|
h_plan_sing_ma = np.vstack((h_plan_sing_ma, h_plan_sing[Vor_mask_part])) |
|
554 |
|
#S_bc = np.cov(h_plan_sing[Vor_mask_part], rowvar=False, bias=True, aweights=weights_sim_part) |
|
555 |
|
S_bc = np.cov(h_plan_sing_ma, rowvar=False, bias=True, aweights=weights_sim) |
|
556 |
|
barycenter = np.average(h_plan_sing_ma, axis=0, weights=weights_sim) |
|
557 |
|
nis_eff += nis |
|
558 |
|
|
|
559 |
|
|
|
560 |
|
|
|
561 |
|
#print(S_bc) |
|
562 |
|
#print(nis_ma) |
|
563 |
|
|
|
564 |
|
|
|
565 |
|
|
|
566 |
|
|
|
567 |
|
cell_stats = dict() |
|
568 |
|
# musí sa (na konci) rovnat jedne |
|
569 |
|
# opravdu dělíme nis'em, jako v normálním IS |
|
570 |
|
# nikoliv počtem příjatých bodíků, |
|
571 |
|
# protože průměrná vaha je o hodně mene významná metrika |
|
572 |
|
cell_stats['cell_probability'] = np.sum(weights_sim) / nis_eff |
|
573 |
|
|
|
574 |
|
|
|
575 |
|
# tu bacha! |
|
576 |
|
# takhle se počíta, pokud se netrapíme gradijentem |
|
577 |
|
# a je to trošiňku optimizovaný, takže čert se nevyzná |
|
578 |
|
if gradient is None: |
|
579 |
|
# indexy ii nás moc nezajimajou |
|
580 |
|
# vzdalenosti snad byjsme zvladli použit? |
|
581 |
|
|
|
582 |
|
|
|
583 |
|
dd2, ii2 = tree.query(h_plan_model_ma, k=[2], p=p_norm) |
|
584 |
|
dd2 = dd2.flatten() |
|
585 |
|
ii2 = ii2.flatten() |
|
586 |
|
|
|
587 |
|
# tahle hračka s indexy je pro numpy poměrně drahá |
|
588 |
|
failsii_2 = failsi[ii2] |
|
589 |
|
# jeden vzorek (včetně hustoty PDF[i]) je nám vždy znám |
|
590 |
|
# porucha |
|
591 |
|
if failsi[i]: |
|
592 |
|
points_1 = PDF[i] * dd2 |
|
593 |
|
node_pf_estimations = points_1 / (points_1 + PDF[ii2] * dd1) |
|
594 |
|
node_pf_estimations = np.where(failsii_2,1, node_pf_estimations) |
|
595 |
|
node_pf_pure_estimations = dd2 / (dd1 + dd2) |
|
596 |
|
node_pf_pure_estimations = np.where(failsii_2,1, node_pf_pure_estimations) |
|
597 |
|
|
|
598 |
|
cell_stats['Voronoi_2_point_upper_bound'] = cell_stats['cell_probability'] |
|
599 |
|
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(weights_sim * node_pf_estimations) / nis_eff |
|
600 |
|
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(weights_sim * node_pf_pure_estimations) / nis_eff |
|
601 |
|
cell_stats['Voronoi_2_point_lower_bound'] = np.sum(weights_sim[failsii_2]) / nis_eff |
|
602 |
|
cell_stats['Voronoi_failure_rate'] = cell_stats['cell_probability'] |
|
603 |
|
nodes=CandyBox(h_plan.sampling_plan[Vor_mask], w=weights_sim, node_pf_estimations=node_pf_estimations,\ |
|
604 |
|
node_pf_pure_estimations=node_pf_pure_estimations, dd1=dd1, dd2=dd2, ii2=ii2) |
|
605 |
|
|
|
606 |
|
# neporucha |
|
607 |
|
else: |
|
608 |
|
dd1 = dd1[failsii_2] |
|
609 |
|
dd2 = dd2[failsii_2] |
|
610 |
|
points_1 = PDF[i] * dd2 |
|
611 |
|
points_2 = PDF[ii2[failsii_2]] * dd1 |
|
612 |
|
|
|
613 |
|
node_pf_estimations = points_2 / (points_1 + points_2) |
|
614 |
|
node_pf_pure_estimations = dd1 / (dd1 + dd2) |
|
615 |
|
|
|
616 |
|
cell_stats['Voronoi_2_point_upper_bound'] = np.sum(weights_sim[failsii_2]) / nis_eff |
|
617 |
|
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(weights_sim[failsii_2]*node_pf_estimations) / nis_eff |
|
618 |
|
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(weights_sim[failsii_2] * node_pf_pure_estimations) / nis_eff |
|
619 |
|
cell_stats['Voronoi_2_point_lower_bound'] = 0 |
|
620 |
|
cell_stats['Voronoi_failure_rate'] = 0 |
|
621 |
|
nodes=CandyBox(h_plan.sampling_plan[Vor_mask][failsii_2], w=weights_sim[failsii_2], node_pf_estimations=node_pf_estimations,\ |
|
622 |
|
node_pf_pure_estimations=node_pf_pure_estimations, dd1=dd1, dd2=dd2, ii2=ii2[failsii_2]) |
|
623 |
|
|
|
624 |
|
# take something with corresponding length |
|
625 |
|
zeros = np.zeros(len(weights_sim) - len(dd2)) |
|
626 |
|
# add remaining nodes |
|
627 |
|
nodes.add_sample(CandyBox(h_plan.sampling_plan[Vor_mask][~failsii_2], w=weights_sim[~failsii_2], node_pf_estimations=zeros,\ |
|
628 |
|
node_pf_pure_estimations=zeros, ii2=ii2[~failsii_2])) |
|
629 |
|
|
|
630 |
|
|
|
631 |
|
# takhle - pokud chceme gradient použit |
|
632 |
|
# je třeba eště zoptimalizovať |
|
633 |
|
else: |
|
634 |
|
# kolik bodíků jsou nejbližší k mému vzorečkovi |
|
635 |
|
# np.empty() implicitně má dtype=float |
|
636 |
|
# tyhle blbosti ponechám jen kvůli callbackovi |
|
637 |
|
node_pf_estimations = np.empty(len(h_plan_model_ma)) |
|
638 |
|
node_pf_pure_estimations = np.empty(len(h_plan_model_ma))# pure distance estimation |
|
639 |
|
node_failsi = np.empty(len(h_plan_model_ma), dtype=np.bool) # for L1 Voronoi # co to je za L1 Voronoi? |
|
640 |
|
|
|
641 |
|
# projdeme přes každej bodíček |
|
642 |
|
for node_idx in range(len(h_plan_model_ma)): |
|
643 |
|
# KDTree byl použit jen k rozdělení na disjunktní úseky, veškerej děj se odehravá tu |
|
644 |
|
# a to je všechno kvůli gradientu |
|
645 |
|
node = h_plan_model_ma[node_idx] |
|
646 |
|
# axis=1 - sčítá všechy směry dohromady, vysledkem je 1D pole rozměru nsim |
|
647 |
|
inode2points_model_matrix = np.sum(np.abs(((sampled_plan_model - node) * gradient(node))**p_norm), axis=1) |
|
648 |
|
#print(inode2points_Rd_matrix) |
|
649 |
|
|
|
650 |
|
""" |
|
651 |
|
partition - |
|
652 |
|
Creates a copy of the array with its elements rearranged in such a way that |
|
653 |
|
the value of the element in k-th position is in the position it would be in a sorted array. |
|
654 |
|
All elements smaller than the k-th element are moved before this element |
|
655 |
|
and all equal or greater are moved behind it. The ordering of the elements in the two partitions is undefined. |
|
656 |
|
""" |
|
657 |
|
idx = np.argpartition(inode2points_model_matrix, 1) # musí tu bejt 1, počítá sa od nuly |
|
658 |
|
# je to správný, neboť numpy zaručuje, že druhej prvek (s indexem 1) bude na druhem místě |
|
659 |
|
node_failsi[node_idx] = failsi[idx[0]] |
|
660 |
|
|
|
661 |
|
|
|
662 |
|
points_weight = PDF[idx[0]] / inode2points_model_matrix[idx[0]] + PDF[idx[1]] / inode2points_model_matrix[idx[1]] |
|
663 |
|
points_distances = 1 / inode2points_model_matrix[idx[0]] + 1 / inode2points_model_matrix[idx[1]] |
|
664 |
|
|
|
665 |
|
failure_weight = int(failsi[idx[0]]) * PDF[idx[0]] / inode2points_model_matrix[idx[0]] |
|
666 |
|
failure_weight += int(failsi[idx[1]]) * PDF[idx[1]] / inode2points_model_matrix[idx[1]] |
|
667 |
|
|
|
668 |
|
failure_distance = int(failsi[idx[0]]) / inode2points_model_matrix[idx[0]] + int(failsi[idx[1]]) / inode2points_model_matrix[idx[1]] |
|
669 |
|
|
|
670 |
|
|
|
671 |
|
node_pf_estimations[node_idx] = failure_weight/points_weight |
|
672 |
|
node_pf_pure_estimations[node_idx] = failure_distance/points_distances |
|
673 |
|
|
|
674 |
|
|
|
675 |
|
cell_stats['Voronoi_2_point_upper_bound'] = np.sum(h_plan.w[Vor_mask]*np.ceil(node_pf_estimations)) / nis_eff |
|
676 |
|
cell_stats['Voronoi_2_point_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_pf_estimations) / nis_eff |
|
677 |
|
cell_stats['Voronoi_2_point_pure_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_pf_pure_estimations) / nis_eff |
|
678 |
|
cell_stats['Voronoi_2_point_lower_bound'] = np.sum(h_plan.w[Vor_mask]*np.floor(node_pf_estimations)) / nis_eff |
|
679 |
|
cell_stats['Voronoi_failure_rate'] = np.sum(h_plan.w[Vor_mask]*node_failsi) / nis_eff |
|
680 |
|
|
|
681 |
|
nodes=CandyBox(h_plan.sampling_plan[Vor_mask], w=h_plan.w[Vor_mask], node_pf_estimations=node_pf_estimations,\ |
|
682 |
|
node_pf_pure_estimations=node_pf_pure_estimations, node_failsi=node_failsi) |
|
683 |
|
|
|
684 |
|
|
|
685 |
|
|
|
686 |
|
for key, value in cell_stats.items(): |
|
687 |
|
global_stats[key] += value |
|
688 |
|
|
|
689 |
|
# kolbek ↓ |
|
690 |
|
if failsi[i]: |
|
691 |
|
cell_stats['event'] = 'failure' |
|
692 |
|
else: |
|
693 |
|
cell_stats['event'] = 'success' |
|
694 |
|
callback(estimation=estimation, nodes=nodes, cell_stats=cell_stats, out_nodes=h_plan[~Vor_mask]) |
|
695 |
|
|
|
696 |
|
|
|
697 |
|
|
|
698 |
|
# dedictvi |
|
699 |
|
#for k in range(len(ii)): |
|
700 |
|
# points_weigths[ii[k]] = points_weigths[ii[k]] + weights_sim[k] / nis |
|
701 |
|
# near_neighbors[ii[k]] = near_neighbors[ii[k]] + 1 |
|
702 |
|
# Vor_mask[k] = failsi[ii[k]] |
|
703 |
|
|
|
704 |
|
|
|
705 |
|
|
|
706 |
|
|
|
707 |
|
|
|
708 |
|
|
|
709 |
|
|
|
710 |
|
|
|
711 |
|
|
|
712 |
|
|
|
713 |
|
|
|
714 |
|
|
|
715 |
|
|
|
716 |
|
|
|
717 |
|
|
|
718 |
|
|
|
719 |
|
|
24 |
720 |
# |
# |
25 |
721 |
# l2 Voronoi cKDTree IS localization |
# l2 Voronoi cKDTree IS localization |
26 |
722 |
# |
# |
27 |
|
|
|
|
723 |
|
|
28 |
724 |
# Rbf_estimation so far |
# Rbf_estimation so far |
29 |
725 |
def L2_Voronoi_cKDTree(sample_box, nis=50000): |
def L2_Voronoi_cKDTree(sample_box, nis=50000): |
30 |
726 |
nsim, nvar = np.shape(sampled_plan_R) |
nsim, nvar = np.shape(sampled_plan_R) |