File estimation.py changed (mode: 100644) (index 1a4e4f3..e120ed4) |
... |
... |
import scipy.stats as stats |
11 |
11 |
|
|
12 |
12 |
from scipy.spatial import cKDTree |
from scipy.spatial import cKDTree |
13 |
13 |
from scipy.spatial import Delaunay |
from scipy.spatial import Delaunay |
|
14 |
|
from scipy import spatial |
14 |
15 |
from scipy import interpolate |
from scipy import interpolate |
15 |
16 |
|
|
16 |
17 |
import collections # for defaultdict |
import collections # for defaultdict |
17 |
18 |
|
|
18 |
19 |
from . import f_models |
from . import f_models |
19 |
20 |
from . import simplex as six |
from . import simplex as six |
|
21 |
|
from . import IS_stat |
20 |
22 |
from .IS_stat import IS |
from .IS_stat import IS |
21 |
23 |
from .candybox import CandyBox |
from .candybox import CandyBox |
22 |
24 |
from . import sball |
from . import sball |
|
... |
... |
def Voronoi_2_point_estimation(sample_box, model_space='Rn', sampling_space=None |
546 |
548 |
|
|
547 |
549 |
|
|
548 |
550 |
|
|
|
551 |
|
|
|
552 |
|
|
|
553 |
|
|
|
554 |
|
def _issi_estimate_outside(f, estimation, callback): |
|
555 |
|
#č zde f-ko musí taky obsahovat vzorky! |
|
556 |
|
|
|
557 |
|
# -1 = 'out' |
|
558 |
|
## -2 = 'inside' # no, actually |
|
559 |
|
#č založme ISSI |
|
560 |
|
oiss = IS_stat.ISSI() |
|
561 |
|
|
|
562 |
|
# current outside probability estimation |
|
563 |
|
p_out_implicit = 0.5 |
|
564 |
|
s_ball = sball.Sball(f.nvar) |
|
565 |
|
base_r = s_ball.get_r(0.5) |
|
566 |
|
budget = estimation['outside_budget'] |
|
567 |
|
|
|
568 |
|
model_space = estimation['model_space'] |
|
569 |
|
sampling_space = estimation['sampling_space'] |
|
570 |
|
sampled_plan_model = getattr(f, model_space) |
|
571 |
|
convex_hull = spatial.ConvexHull(sampled_plan_model) |
|
572 |
|
|
|
573 |
|
# first step |
|
574 |
|
nodes = f(budget) |
|
575 |
|
mask = six.is_outside(convex_hull, getattr(nodes, model_space)) |
|
576 |
|
|
|
577 |
|
|
|
578 |
|
# here both out and outside means 'outside of sampled domain' |
|
579 |
|
out_nodes = nodes[mask] |
|
580 |
|
outside_nodes = CandyBox(out_nodes, w=np.full(len(out_nodes), 1, dtype=np.int8)) |
|
581 |
|
|
|
582 |
|
#čs necháme ISSI trapit sa pravděpodobnostma |
|
583 |
|
if len(outside_nodes) > 0: |
|
584 |
|
oiss.add_single_event_data(outside_nodes.w, event=-1, nis=budget) |
549 |
585 |
|
|
|
586 |
|
while len(outside_nodes) < budget/2: |
|
587 |
|
p_out_implicit = p_out_implicit/2 |
|
588 |
|
|
|
589 |
|
sampling_r, p_out_implicit = s_ball.get_r_iteration(p_out_implicit) |
|
590 |
|
# asi tam bylo sampling_r/bx.base_r, že? |
|
591 |
|
# u stats.norm zadáváme směrodatnou odchylku, je to asi správné |
|
592 |
|
h = f_models.UnCorD([stats.norm(0, sampling_r/base_r) for i in range(f.nvar)]) |
|
593 |
|
nodes = IS_stat.IS(f, h, space_from_h='R', space_to_f=sampling_space, Nsim=budget) |
|
594 |
|
|
|
595 |
|
mask = six.is_outside(convex_hull, getattr(nodes, model_space)) |
|
596 |
|
|
|
597 |
|
|
|
598 |
|
# here both out and outside means 'outside of sampled domain' |
|
599 |
|
out_nodes = nodes[mask] |
|
600 |
|
#č podle mého názoru není nutné zde se trapit odhady |
|
601 |
|
#č je třeba jenom sehnat dostatečně bodíků a utikat |
|
602 |
|
if len(out_nodes) > 0: |
|
603 |
|
oiss.add_single_event_data(out_nodes.w, event=-1, nis=budget) |
|
604 |
|
outside_nodes.add_sample(out_nodes) |
|
605 |
|
|
|
606 |
|
|
|
607 |
|
weighted_mean, *__ = oiss.get_means() |
|
608 |
|
|
|
609 |
|
if callback is not None: |
|
610 |
|
cell_stats = dict() |
|
611 |
|
#č tohle je opravdu jen pro formu |
|
612 |
|
cell_stats['cell_probability'] = weighted_mean[0] |
|
613 |
|
cell_stats['vertex_estimation'] = 0 |
|
614 |
|
cell_stats['weighted_vertex_estimation'] = 0 |
|
615 |
|
cell_stats['event'] = 'outside' #č "outside" se mi libí víc než prostě "out" |
|
616 |
|
|
|
617 |
|
|
|
618 |
|
#:) kolbek ↓ |
|
619 |
|
#nodes = h_plan[mask] |
|
620 |
|
# out_nodes here meant "not needed" |
|
621 |
|
#out_nodes = h_plan[~mask] #č možná je čas se zbavit toho, co se stejně nepouživá? |
|
622 |
|
callback(estimation=estimation, nodes=outside_nodes, cell_stats=cell_stats) #, out_nodes=out_nodes) |
|
623 |
|
|
|
624 |
|
return oiss, weighted_mean[0] |
|
625 |
|
|
|
626 |
|
|
|
627 |
|
|
|
628 |
|
def simplex_estimation(sample_box, model_space='Rn', sampling_space=None, weighting_space=None,\ |
|
629 |
|
outside_budget=1000, simplex_budget=100, callback=None): |
|
630 |
|
""" |
|
631 |
|
Delaunay triangulation |
|
632 |
|
budget=20000 |
|
633 |
|
""" |
|
634 |
|
|
|
635 |
|
nsim = sample_box.nsim |
|
636 |
|
nvar = sample_box.nvar |
|
637 |
|
|
|
638 |
|
|
|
639 |
|
#č vytahneme ze sample_boxu rozdělení |
|
640 |
|
f = sample_box.f_model |
|
641 |
|
|
|
642 |
|
|
|
643 |
|
#č zde provadím rozdělení na prostor, ve kterém vzorkujem |
|
644 |
|
#čs a prostor "modelu", vô ktôrom, v podstatě, měříme vzdaleností |
|
645 |
|
sampled_plan_model = getattr(sample_box, model_space) |
|
646 |
|
if sampling_space is None: |
|
647 |
|
sampling_space = model_space |
|
648 |
|
|
|
649 |
|
if weighting_space is None: |
|
650 |
|
weighting_space = model_space |
|
651 |
|
|
|
652 |
|
|
|
653 |
|
#č jsou to informace pro callback |
|
654 |
|
estimation={'method': "simplex_estimation", 'nsim':nsim} |
|
655 |
|
estimation['model_space'] = model_space |
|
656 |
|
estimation['sampling_space'] = sampling_space |
|
657 |
|
estimation['weighting_space'] = weighting_space |
|
658 |
|
estimation['outside_budget'] = outside_budget |
|
659 |
|
estimation['simplex_budget'] = simplex_budget |
|
660 |
|
|
|
661 |
|
|
|
662 |
|
siss, outside = _issi_estimate_outside(f, estimation, callback) |
|
663 |
|
global_stats = {'failure':0, 'success':0, 'mix':0, 'outside':outside} |
|
664 |
|
global_stats['vertex_estimation'] = 0 |
|
665 |
|
global_stats['weighted_vertex_estimation'] = 0 |
|
666 |
|
|
|
667 |
|
|
|
668 |
|
#č žádné chyby nechytám |
|
669 |
|
#čs když se tringulace nepovede tak nemáme čo robiť |
|
670 |
|
tri = Delaunay(sampled_plan_model) |
|
671 |
|
#č koplanar neřeším (ale vrátím spolu s ostatníma věcma na konci) |
|
672 |
|
|
|
673 |
|
|
|
674 |
|
#č Metoda musí simplexům přiřazovat jev |
|
675 |
|
# 0=success, 1=failure, 2=mix |
|
676 |
|
event_ids = six.get_events(sample_box, tri.simplices) |
|
677 |
|
|
|
678 |
|
|
|
679 |
|
#č zde postupně v cyklu prochazíme všemi simplexy |
|
680 |
|
#č tynhlenstím zajišťujeme disjunktnost |
|
681 |
|
#čs a môžeme všechny nasbírané pravděpodobnosti jednoduše sčítat |
|
682 |
|
for simplex_id, simplex, event_id in zip(range(tri.nsimplex), tri.simplices, event_ids): |
|
683 |
|
|
|
684 |
|
#оӵ чылкыт f_model |
|
685 |
|
vertices = f[simplex] |
|
686 |
|
|
|
687 |
|
|
|
688 |
|
# already divided by nsim in variance formule |
|
689 |
|
# divide by /(nvar+1)/(nvar+2) from simplex inertia tensor solution |
|
690 |
|
# multiply by simplex_volume, but it looks like it shouldn't be here |
|
691 |
|
# for simplex: d = nvar+2 |
|
692 |
|
#č sice to má nazev h_plan, ale nese rozdělení a hustoty v f-ku |
|
693 |
|
h_plan = IS_stat.IS_like(vertices, sampling_space=sampling_space, nis=simplex_budget, d=nvar+2) |
|
694 |
|
|
|
695 |
|
|
|
696 |
|
h_plan_model = getattr(h_plan, model_space) |
|
697 |
|
vertices_model = getattr(vertices, model_space) |
|
698 |
|
|
|
699 |
|
#č budeme pokažde sestavovat triangulaci z jedného simplexu |
|
700 |
|
#č a rešit jen zda naši bodíky "inside or outside" |
|
701 |
|
#č (s narustajícím nsim tohle brzy se stavá rychlejším než bežný dotaz) |
|
702 |
|
found_simplices = spatial.Delaunay(vertices_model).find_simplex(h_plan_model) |
|
703 |
|
|
|
704 |
|
#č ten simplex nic neznamená, asi nebudu jej použivat |
|
705 |
|
h_plan.simplex = found_simplices |
|
706 |
|
|
|
707 |
|
#č uvnitř simplexu - mel by tam bejt pouze jeden, "nulový" simplex |
|
708 |
|
mask = found_simplices == 0 |
|
709 |
|
|
|
710 |
|
|
|
711 |
|
#čs necháme ISSI trapit sa pravděpodobnostma |
|
712 |
|
siss.add_single_event_data(h_plan.w[mask], event=simplex_id, nis=simplex_budget) |
|
713 |
|
|
|
714 |
|
|
|
715 |
|
|
|
716 |
|
|
|
717 |
|
|
|
718 |
|
|
|
719 |
|
|
|
720 |
|
# -1 = 'outside', 0=success, 1=failure, 2=mix |
|
721 |
|
if event_id == 0: |
|
722 |
|
event = 'success' |
|
723 |
|
elif event_id == 1: |
|
724 |
|
event = 'failure' |
|
725 |
|
else: #č simplex -1 mít nemůže |
|
726 |
|
event = 'mix' |
|
727 |
|
|
|
728 |
|
|
|
729 |
|
#č součet tady nemusí sa (na konci) rovnat jedne |
|
730 |
|
#č opravdu dělíme nis'em, jako v normálním IS |
|
731 |
|
#č nikoliv počtem příjatých bodíků, |
|
732 |
|
#č protože průměrná vaha je o hodně mene významná metrika |
|
733 |
|
simplex_probability = np.sum(h_plan.w[mask]) / simplex_budget |
|
734 |
|
|
|
735 |
|
|
|
736 |
|
|
|
737 |
|
# -1 = 'outside', 0=success, 1=failure, 2=mix |
|
738 |
|
if event_id == 0: |
|
739 |
|
vertex_estimation = 0 |
|
740 |
|
weighted_vertex_estimation = 0 |
|
741 |
|
elif event_id == 1: |
|
742 |
|
vertex_estimation = simplex_probability |
|
743 |
|
weighted_vertex_estimation = simplex_probability |
|
744 |
|
else: # mix #č simplex -1 mít nemůže |
|
745 |
|
# fp like a failure points. Number of failure points |
|
746 |
|
# intersect 2 times faster than setdiff (in my tests) |
|
747 |
|
fp = len(np.intersect1d(simplex, sample_box.failure_points, assume_unique=True)) |
|
748 |
|
vertex_estimation = simplex_probability * fp / (nvar + 1) |
|
749 |
|
|
|
750 |
|
fp = np.sum(sample_box[simplex].failure_samples.pdf(weighting_space)) |
|
751 |
|
p = np.sum(vertices.pdf(weighting_space)) |
|
752 |
|
weighted_vertex_estimation = simplex_probability * fp / p |
|
753 |
|
|
|
754 |
|
|
|
755 |
|
global_stats[event] += simplex_probability |
|
756 |
|
global_stats['vertex_estimation'] += vertex_estimation |
|
757 |
|
global_stats['weighted_vertex_estimation'] += weighted_vertex_estimation |
|
758 |
|
|
|
759 |
|
if callback is not None: |
|
760 |
|
cell_stats = dict() |
|
761 |
|
|
|
762 |
|
cell_stats['cell_probability'] = simplex_probability |
|
763 |
|
cell_stats['vertex_estimation'] = vertex_estimation |
|
764 |
|
cell_stats['weighted_vertex_estimation'] = weighted_vertex_estimation |
|
765 |
|
cell_stats['event'] = event |
|
766 |
|
|
|
767 |
|
|
|
768 |
|
# kolbek ↓ |
|
769 |
|
nodes = h_plan[mask] |
|
770 |
|
out_nodes = h_plan[~mask] |
|
771 |
|
callback(estimation=estimation, simplex=vertices, nodes=nodes, cell_stats=cell_stats, out_nodes=out_nodes) |
|
772 |
|
|
|
773 |
|
|
|
774 |
|
|
|
775 |
|
|
|
776 |
|
|
|
777 |
|
|
|
778 |
|
|
|
779 |
|
# TRI-compatible estimation |
|
780 |
|
# -1 = 'outside', 0=success, 1=failure, 2=mix |
|
781 |
|
tri_estimation = six.get_TRI_estimation(siss, event_ids) |
|
782 |
|
|
|
783 |
|
result = {**estimation, **global_stats, 'TRI_estimation': tri_estimation, 'coplanar':tri.coplanar} |
|
784 |
|
# try to put it to the BlackBox |
|
785 |
|
try: |
|
786 |
|
sample_box.estimations.append(result) |
|
787 |
|
except: #č asi ani nebudu neuspěch hlasit |
|
788 |
|
pass |
|
789 |
|
|
|
790 |
|
return result |
|
791 |
|
|
|
792 |
|
|
|
793 |
|
|
|
794 |
|
|
|
795 |
|
|
|
796 |
|
|
|
797 |
|
|
|
798 |
|
def _estimate_outside(f, estimation, callback): |
|
799 |
|
#č zde f-ko musí taky obsahovat vzorky! |
|
800 |
|
|
|
801 |
|
# -1 = 'out', 0=success, 1=failure, 2=mix |
|
802 |
|
# current outside probability estimation |
|
803 |
|
p_out_implicit = 0.5 |
|
804 |
|
budget = estimation['outside_budget'] |
|
805 |
|
|
|
806 |
|
model_space = estimation['model_space'] |
|
807 |
|
sampled_plan_model = getattr(f, model_space) |
|
808 |
|
convex_hull = spatial.ConvexHull(sampled_plan_model) |
|
809 |
|
|
|
810 |
|
# first step |
|
811 |
|
nodes = f(budget) |
|
812 |
|
mask = six.is_outside(convex_hull, getattr(nodes, model_space)) |
|
813 |
|
|
|
814 |
|
|
|
815 |
|
# here outside means 'outside of sampled domain' |
|
816 |
|
outside_nodes = nodes[mask] |
|
817 |
|
outside_nodes = CandyBox(outside_nodes, w=np.full(len(outside_nodes), 1, dtype=np.int8)) |
|
818 |
|
|
|
819 |
|
|
|
820 |
|
while len(outside_nodes) < budget: |
|
821 |
|
p_out = p_out/2 |
|
822 |
|
|
|
823 |
|
sampling_r, __ = bx.sball.get_r_iteration(p_out) |
|
824 |
|
# asi tam bylo sampling_r/bx.base_r, že? |
|
825 |
|
# u stats.norm zadáváme směrodatnou odchylku, je to asi správné |
|
826 |
|
h = f_models.UnCorD([stats.norm(0, sampling_r/bx.base_r) for i in range(bx.nvar)]) |
|
827 |
|
candidates = IS_stat.IS(bx.f, h, space_from_h='R', space_to_f=bx.sampling_space, Nsim=bx.budget) |
|
828 |
|
|
|
829 |
|
is_outside(convex_hull, node_coordinates) |
|
830 |
|
candidates = candidates[candidates.is_outside] |
|
831 |
|
|
|
832 |
|
|
|
833 |
|
|
|
834 |
|
#č necháme ISSI trapit sa pravděpodobnostma |
|
835 |
|
bx.siss.add_single_event_data(candidates.w, event=-1, nis=bx.budget) |
|
836 |
|
|
|
837 |
|
|
|
838 |
|
if callback is not None: |
|
839 |
|
cell_stats = dict() |
|
840 |
|
|
|
841 |
|
cell_stats['cell_probability'] = simplex_probability |
|
842 |
|
cell_stats['vertex_estimation'] = 0 |
|
843 |
|
cell_stats['weighted_vertex_estimation'] = 0 |
|
844 |
|
cell_stats['event'] = 'out' |
|
845 |
|
|
|
846 |
|
|
|
847 |
|
# kolbek ↓ |
|
848 |
|
nodes = h_plan[mask] |
|
849 |
|
#out_nodes = h_plan[~mask] #č možná je čas se zbavit toho, co se stejně nepouživá? |
|
850 |
|
callback(estimation=estimation, nodes=nodes, cell_stats=cell_stats) #, out_nodes=out_nodes) |
|
851 |
|
|
|
852 |
|
|
550 |
853 |
|
|
551 |
854 |
def simple_simplex_estimation(sample_box, model_space='Rn', sampling_space=None, nis=1000, callback=None): |
def simple_simplex_estimation(sample_box, model_space='Rn', sampling_space=None, nis=1000, callback=None): |
552 |
855 |
""" |
""" |
|
... |
... |
def simple_simplex_estimation(sample_box, model_space='Rn', sampling_space=None, |
557 |
860 |
nsim = sample_box.nsim |
nsim = sample_box.nsim |
558 |
861 |
nvar = sample_box.nvar |
nvar = sample_box.nvar |
559 |
862 |
|
|
560 |
|
# jsou to informace pro callback |
|
|
863 |
|
#č jsou to informace pro callback |
561 |
864 |
estimation={'method': "simple_simplex_estimation", 'nis':nis, 'nsim':nsim} |
estimation={'method': "simple_simplex_estimation", 'nis':nis, 'nsim':nsim} |
562 |
865 |
estimation['model_space'] = model_space |
estimation['model_space'] = model_space |
563 |
866 |
estimation['sampling_space'] = sampling_space |
estimation['sampling_space'] = sampling_space |
564 |
867 |
|
|
565 |
868 |
|
|
566 |
|
# vytahneme ze sample_boxu rozdělení |
|
|
869 |
|
#č vytahneme ze sample_boxu rozdělení |
567 |
870 |
f = sample_box.sampled_plan |
f = sample_box.sampled_plan |
568 |
871 |
|
|
569 |
872 |
|
|
570 |
|
# zde provadím rozdělení na prostor, ve kterém vzorkujem |
|
571 |
|
# a prostor "modelu", vô ktôrom, v podstatě, měříme vzdaleností |
|
|
873 |
|
#č zde provadím rozdělení na prostor, ve kterém vzorkujem |
|
874 |
|
#čs a prostor "modelu", vô ktôrom, v podstatě, měříme vzdaleností |
572 |
875 |
sampled_plan_model = getattr(sample_box, model_space) |
sampled_plan_model = getattr(sample_box, model_space) |
573 |
876 |
|
|
574 |
|
# žádné chyby nechytám |
|
575 |
|
# když se tringulace nepovede tak nemáme čo robiť |
|
|
877 |
|
#č žádné chyby nechytám |
|
878 |
|
#čs když se tringulace nepovede tak nemáme čo robiť |
576 |
879 |
tri = Delaunay(sampled_plan_model) |
tri = Delaunay(sampled_plan_model) |
577 |
880 |
# koplanar neřeším (ale vrátím spolu s ostatníma věcma na konci) |
# koplanar neřeším (ale vrátím spolu s ostatníma věcma na konci) |
578 |
|
# if len(tri.coplanar): |
|
579 |
|
# #print('triangulace v pořádku není') |
|
580 |
|
# bx._log('triangulation is coplanar') |
|
581 |
|
# else: |
|
582 |
|
# #print('triangulace je v pořádku') |
|
583 |
|
# bx._log('triangulation is OK') |
|
584 |
|
|
|
585 |
|
|
|
586 |
881 |
|
|
587 |
882 |
if sampling_space is None: |
if sampling_space is None: |
588 |
883 |
sampling_space = model_space |
sampling_space = model_space |
|
... |
... |
def simple_simplex_estimation(sample_box, model_space='Rn', sampling_space=None, |
600 |
895 |
global_stats = {'failure':0, 'success':0, 'mix':0} |
global_stats = {'failure':0, 'success':0, 'mix':0} |
601 |
896 |
|
|
602 |
897 |
|
|
603 |
|
# zde postupně v cyklu prochazíme všemi simplexy |
|
604 |
|
# tynhlenstím zajišťujeme disjunktnost |
|
605 |
|
# a môžeme všechny nasbírané pravděpodobnosti jednoduše sčítat |
|
|
898 |
|
#č zde postupně v cyklu prochazíme všemi simplexy |
|
899 |
|
#č tynhlenstím zajišťujeme disjunktnost |
|
900 |
|
#čs a môžeme všechny nasbírané pravděpodobnosti jednoduše sčítat |
606 |
901 |
for simplex in tri.simplices: |
for simplex in tri.simplices: |
607 |
902 |
|
|
608 |
903 |
vertices = sampled_plan_sing[simplex] |
vertices = sampled_plan_sing[simplex] |
|
... |
... |
def simple_simplex_estimation(sample_box, model_space='Rn', sampling_space=None, |
731 |
1026 |
pass |
pass |
732 |
1027 |
|
|
733 |
1028 |
return result |
return result |
734 |
|
|
|