Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
cisc/test_synthetic.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
627 lines (519 sloc)
21.2 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""Here, we test the accuracy of various causal inference techniques in identifying ANMs with different ranges of noise term. | |
""" | |
from __future__ import division | |
from functools import partial | |
import math | |
import random | |
import sys | |
import time | |
import numpy as np | |
from stat_tests import friedman_test, nemenyi_multitest, bonferroni_dunn_test | |
from cisc import cisc | |
from crisp import crisp | |
from crispe import crispe | |
from dc import dc | |
from dr import dr | |
from utils import progress, plot_multiline, reverse_argsort, dc_compat | |
__author__ = "Kailash Budhathoki" | |
__email__ = "kbudhath@mpi-inf.mpg.de" | |
__copyright__ = "Copyright (c) 2017" | |
__license__ = "MIT" | |
class InfRes(object): | |
def __init__(self): | |
self.ncorrect = 0 | |
self.nwrong = 0 | |
self.nundec = 0 | |
def to_str(self, nsample): | |
assert self.ncorrect + self.nwrong + self.nundec == nsample | |
return str.center("%d %d %d %.2f" % (self.ncorrect, self.nwrong, self.nundec, self.ncorrect / nsample), 20) | |
def map_randomly(Xd, fXd): | |
f = dict() | |
for x in Xd: | |
y = random.choice(fXd) | |
f[x] = y | |
# ensure that f is not a constant function | |
if len(set(f.values())) == 1: | |
f = map_randomly(Xd, fXd) | |
assert len(set(f.values())) != 1 | |
return f | |
def generate_X(src, size): | |
if src == "uniform": | |
maxX = random.randint(2, 10) | |
X = [random.randint(1, maxX) for i in xrange(size)] | |
elif src == "multinomial": | |
p_nums = [random.randint(1, 10) for i in xrange(random.randint(2, 11))] | |
p_vals = [v / sum(p_nums) for v in p_nums] | |
X = np.random.multinomial(size, p_vals, size=1)[0].tolist() | |
X = [[i + 1] * f for i, f in enumerate(X)] | |
X = [j for sublist in X for j in sublist] | |
elif src == "binomial": | |
n = random.randint(1, 40) | |
p = random.uniform(0.1, 0.9) | |
X = np.random.binomial(n, p, size).tolist() | |
elif src == "geometric": | |
p = random.uniform(0.1, 0.9) | |
X = np.random.geometric(p, size).tolist() | |
elif src == "hypergeometric": | |
ngood = random.randint(1, 40) | |
nbad = random.randint(1, 40) | |
nsample = random.randint(1, min(40, ngood + nbad - 1)) | |
X = np.random.hypergeometric(ngood, nbad, nsample, size).tolist() | |
elif src == "poisson": | |
lam = random.randint(1, 10) | |
X = np.random.poisson(lam, size).tolist() | |
elif src == "negativeBinomial": | |
n = random.randint(1, 40) | |
p = random.uniform(0.1, 0.9) | |
X = np.random.negative_binomial(n, p, size).tolist() | |
return X | |
def generate_additive_N(size): | |
t = random.randint(1, 7) | |
suppN = range(-t, t + 1) | |
N = [random.choice(suppN) for i in xrange(size)] | |
return N | |
def test_decision_rate(): | |
srcsX = ["uniform", "binomial", "negativeBinomial", | |
"geometric", "hypergeometric", "poisson", "multinomial"] | |
total = len(srcsX) | |
count = 0 | |
progress(count, total) | |
for srcX in srcsX: | |
count += 1 | |
_decision_rate(srcX) | |
progress(count, total) | |
def _decision_rate(srcX): | |
nsample = 1000 | |
size = 1000 | |
level = 0.05 | |
suppfX = range(-7, 8) | |
diff_cisc, diff_dc, diff_dr = [], [], [] | |
res_cisc, res_dc, res_dr = [], [], [] | |
for k in xrange(nsample): | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
cisc_score = cisc(X, Y) | |
dc_score = dc(dc_compat(X), dc_compat(Y)) | |
dr_score = dr(X, Y, level) | |
undecided_cisc = cisc_score[0] == cisc_score[1] | |
undecided_dc = dc_score[0] == dc_score[1] | |
undecided_dr = (dr_score[0] < level and dr_score[1] < level) or ( | |
dr_score[0] > level and dr_score[1] > level) | |
diff_cisc.append(abs(cisc_score[0] - cisc_score[1])) | |
diff_dc.append(abs(dc_score[0] - dc_score[1])) | |
diff_dr.append(abs(dr_score[0] - dr_score[1])) | |
if undecided_cisc: | |
res_cisc.append(random.choice([True, False])) | |
else: | |
res_cisc.append(cisc_score[0] < cisc_score[1]) | |
if undecided_dc: | |
res_dc.append(random.choice([True, False])) | |
else: | |
res_dc.append(dc_score[0] < dc_score[1]) | |
if undecided_dr: | |
res_dr.append(random.choice([True, False])) | |
else: | |
res_dr.append(dr_score[0] > level and dr_score[1] < level) | |
indices_cisc = reverse_argsort(diff_cisc) | |
indices_dc = reverse_argsort(diff_dc) | |
indices_dr = reverse_argsort(diff_dr) | |
diff_cisc = [diff_cisc[i] for i in indices_cisc] | |
diff_dc = [diff_dc[i] for i in indices_dc] | |
diff_dr = [diff_dr[i] for i in indices_dr] | |
res_cisc = [res_cisc[i] for i in indices_cisc] | |
res_dc = [res_dc[i] for i in indices_dc] | |
res_dr = [res_dr[i] for i in indices_dr] | |
dec_rate = np.arange(0.01, 1.01, 0.01) | |
acc_cisc, acc_dc, acc_dr = [], [], [] | |
for r in dec_rate: | |
dec_cisc_r = res_cisc[:int(r * nsample)] | |
dec_dc_r = res_dc[:int(r * nsample)] | |
dec_dr_r = res_dr[:int(r * nsample)] | |
acc_cisc_r = sum(dec_cisc_r) / len(dec_cisc_r) | |
acc_dc_r = sum(dec_dc_r) / len(dec_dc_r) | |
acc_dr_r = sum(dec_dr_r) / len(dec_dr_r) | |
acc_cisc.append(acc_cisc_r) | |
acc_dc.append(acc_dc_r) | |
acc_dr.append(acc_dr_r) | |
# print dec_rate | |
# print acc_dc | |
# print acc_dr | |
# print acc_cisc | |
with open("results/dec_rate_synth_%s.dat" % srcX, "w") as writer: | |
for i, r in enumerate(dec_rate): | |
writer.write("%.2f %.2f %.2f %.2f\n" % | |
(r, acc_dc[i], acc_dr[i], acc_cisc[i])) | |
# plot_multiline([acc_cisc, acc_dc, acc_dr], dec_rate, [ | |
# "CISC", "DC", "DR"], "decision rate", "accuracy", "decision rate versus accuracy", "dec_rate_%sX.png" % srcX) | |
def are_disjoint(sets): | |
disjoint = True | |
union = set() | |
for s in sets: | |
for x in s: | |
if x in union: | |
disjoint = False | |
break | |
union.add(x) | |
return disjoint | |
def test_accuracy(): | |
nsim = 1000 | |
size = 1000 | |
level = 0.01 | |
suppfX = range(-7, 8) | |
srcsX = ["uniform", "binomial", "negativeBinomial", | |
"geometric", "hypergeometric", "poisson", "multinomial"] | |
print "-" * 70 | |
print "%18s%10s%10s%10s%10s%10s" % ("TYPE_X", "DC", "DR", "CISC", "CRISPE", "CRISP") | |
print "-" * 70 | |
sys.stdout.flush() | |
fp = open("results/acc-dtype.dat", "w") | |
fp.write("%s\t%s\t%s\t%s\t%s\t%s\n" % | |
("dtype", "dc", "dr", "cisc", "crispe", "crisp")) | |
for srcX in srcsX: | |
nsamples = 0 | |
nc_dc, nc_dr, nc_cisc, nc_crispe, nc_crisp = 0, 0, 0, 0, 0 | |
while nsamples < nsim: | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
# check if f(x) + supp N are disjoint for x in domx | |
suppN = set(N) | |
decomps = [] | |
for x in suppX: | |
fx = f[x] | |
sum_fx_suppN = set([fx + n for n in suppN]) | |
decomps.append(sum_fx_suppN) | |
non_overlapping_noise = are_disjoint(decomps) | |
if non_overlapping_noise: | |
continue | |
nsamples += 1 | |
dc_score = dc(dc_compat(X), dc_compat(Y)) | |
dr_score = dr(X, Y, level) | |
cisc_score = cisc(X, Y) | |
crispe_score = crispe(X, Y) | |
crisp_score = crisp(X, Y) | |
nc_dc += int(dc_score[0] < dc_score[1]) | |
nc_dr += int(dr_score[0] > level and dr_score[1] < level) | |
nc_cisc += int(cisc_score[0] < cisc_score[1]) | |
nc_crispe += int(crispe_score[0] < crispe_score[1]) | |
nc_crisp += int(crisp_score[0] < crisp_score[1]) | |
assert nsamples == nsim | |
acc_dc = nc_dc * 100 / nsim | |
acc_dr = nc_dr * 100 / nsim | |
acc_cisc = nc_cisc * 100 / nsim | |
acc_crispe = nc_crispe * 100 / nsim | |
acc_crisp = nc_crisp * 100 / nsim | |
print "%18s%10.2f%10.2f%10.2f%10.2f%10.2f" % (srcX, acc_dc, acc_dr, acc_cisc, acc_crispe, acc_crisp) | |
sys.stdout.flush() | |
fp.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" % | |
(srcX, acc_dc, acc_dr, acc_cisc, acc_crispe, acc_crisp)) | |
print "-" * 70 | |
sys.stdout.flush() | |
fp.close() | |
# todo: use crisp | |
def test_accuracy_at_model_space(): | |
nsim = 1000 | |
size = 1000 | |
level = 0.05 | |
suppfX = range(-7, 8) | |
S = range(1, 7) | |
srcsX = ["uniform", "binomial", "negativeBinomial", | |
"geometric", "hypergeometric", "poisson", "multinomial"] | |
print "%s | %s | %s | %s" % ("typeX".center(18), "sc_space".center(62), "dr_space".center(62), "full_space".center(62)) | |
print "%18s | %s %s %s | %s %s %s | %s %s %s" % ("", "sc".center(20), "dc".center(20), "dr".center(20), "sc".center(20), "dc".center(20), "dr".center(20), "sc".center(20), "dc".center(20), "dr".center(20)) | |
for srcX in srcsX: | |
nsample_sc, nsample_anm, nsample_all = 0, 0, 0 | |
rsc_sc, rsc_dc, rsc_dr = InfRes(), InfRes(), InfRes() | |
ranm_sc, ranm_dc, ranm_dr = InfRes(), InfRes(), InfRes() | |
rall_sc, rall_dc, rall_dr = InfRes(), InfRes(), InfRes() | |
for k in xrange(nsim): | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
cisc_score = cisc(X, Y) | |
dc_score = dc(dc_compat(X), dc_compat(Y)) | |
dr_score = dr(X, Y, level) | |
undecided_dr = (dr_score[0] < level and dr_score[1] < level) or ( | |
dr_score[0] > level and dr_score[1] > level) | |
if cisc_score[0] != cisc_score[1]: | |
nsample_sc += 1 | |
rsc_sc.ncorrect += int(cisc_score[0] < cisc_score[1]) | |
rsc_sc.nwrong += int(cisc_score[0] > cisc_score[1]) | |
rsc_sc.nundec += int(cisc_score[0] == cisc_score[1]) | |
rsc_dc.ncorrect += int(dc_score[0] < dc_score[1]) | |
rsc_dc.nwrong += int(dc_score[0] > dc_score[1]) | |
rsc_dc.nundec += int(dc_score[0] == dc_score[1]) | |
rsc_dr.ncorrect += int(dr_score[0] | |
> level and dr_score[1] < level) | |
rsc_dr.nwrong += int(dr_score[0] | |
< level and dr_score[1] > level) | |
rsc_dr.nundec += int(undecided_dr) | |
if not undecided_dr: | |
nsample_anm += 1 | |
ranm_sc.ncorrect += int(cisc_score[0] < cisc_score[1]) | |
ranm_sc.nwrong += int(cisc_score[0] > cisc_score[1]) | |
ranm_sc.nundec += int(cisc_score[0] == cisc_score[1]) | |
ranm_dc.ncorrect += int(dc_score[0] < dc_score[1]) | |
ranm_dc.nwrong += int(dc_score[0] > dc_score[1]) | |
ranm_dc.nundec += int(dc_score[0] == dc_score[1]) | |
ranm_dr.ncorrect += int(dr_score[0] | |
> level and dr_score[1] < level) | |
ranm_dr.nwrong += int(dr_score[0] | |
< level and dr_score[1] > level) | |
ranm_dr.nundec += int(undecided_dr) | |
nsample_all += 1 | |
rall_sc.ncorrect += int(cisc_score[0] < cisc_score[1]) | |
rall_sc.nwrong += int(cisc_score[0] > cisc_score[1]) | |
rall_sc.nundec += int(cisc_score[0] == cisc_score[1]) | |
rall_dc.ncorrect += int(dc_score[0] < dc_score[1]) | |
rall_dc.nwrong += int(dc_score[0] > dc_score[1]) | |
rall_dc.nundec += int(dc_score[0] == dc_score[1]) | |
rall_dr.ncorrect += int(dr_score[0] | |
> level and dr_score[1] < level) | |
rall_dr.nwrong += int(dr_score[0] < level and dr_score[1] > level) | |
rall_dr.nundec += int(undecided_dr) | |
print "%s | %s %s %s | %s %s %s | %s %s %s" % (srcX.center(18), rsc_sc.to_str(nsample_sc), rsc_dc.to_str(nsample_sc), rsc_dr.to_str(nsample_sc), ranm_sc.to_str(nsample_anm), ranm_dc.to_str(nsample_anm), ranm_dr.to_str(nsample_anm), rall_sc.to_str(nsample_all), rall_dc.to_str(nsample_all), rall_dr.to_str(nsample_all)) | |
def execute_algorithms(X, Y, nloop): | |
dcX, dcY = dc_compat(X), dc_compat(Y) | |
dc_t, dr_t, cisc_t, crisp_t = 0, 0, 0, 0 | |
for i in xrange(nloop): | |
tstart = time.time() | |
dc(dcX, dcY) | |
tend = time.time() | |
dc_t += tend - tstart | |
tstart = time.time() | |
dr(X, Y, 0.05) | |
tend = time.time() | |
dr_t += tend - tstart | |
tstart = time.time() | |
cisc(X, Y) | |
tend = time.time() | |
cisc_t += tend - tstart | |
tstart = time.time() | |
crisp(X, Y) | |
tend = time.time() | |
crisp_t += tend - tstart | |
return dc_t / nloop, dr_t / nloop, cisc_t / nloop, crisp_t / nloop | |
def test_size_runtime(): | |
nloop = 5 | |
suppX = range(20) | |
suppY = range(20) | |
sizes = range(1000000, 11000000, 1000000) | |
fp = open("results/time-size.dat", "w") | |
fp.write("size\tdc\tdr\tcisc\tcrisp\n") | |
for size in sizes: | |
X = [random.choice(suppX) for i in xrange(size)] | |
Y = [random.choice(suppY) for i in xrange(size)] | |
dc_t, dr_t, cisc_t, crisp_t = execute_algorithms(X, Y, nloop) | |
print size, dc_t, dr_t, cisc_t, crisp_t | |
sys.stdout.flush() | |
fp.write("%d %f %f %f %f\n" % (size, dc_t, dr_t, cisc_t, crisp_t)) | |
fp.close() | |
def test_domain_runtime(): | |
nloop = 5 | |
size = 10000 | |
domains = range(100, 1100, 100) | |
fp = open("results/time-domain.dat", "w") | |
fp.write("domain\tdc\tdr\tcisc\tcrisp\n") | |
for domain in domains: | |
suppX = range(domain) | |
suppY = range(domain) | |
X = [random.choice(suppX) for i in xrange(size)] | |
Y = [random.choice(suppY) for i in xrange(size)] | |
dc_t, dr_t, cisc_t, crisp_t = execute_algorithms(X, Y, nloop) | |
print domain, dc_t, dr_t, cisc_t, crisp_t | |
sys.stdout.flush() | |
fp.write("%d %f %f %f %f\n" % (domain, dc_t, dr_t, cisc_t, crisp_t)) | |
fp.close() | |
def test_power(): | |
sizes = [1000, 2000, 3000, 4000, 5000] | |
sizes = [1000] | |
level = 0.05 | |
srcX = "multinomial" | |
nsim_cutoff, nsim_power = 100, 100 | |
suppfX = range(-7, 8) | |
for size in sizes: | |
diffs_dc, diffs_dr, diffs_cisc = [], [], [] | |
for i in xrange(nsim_cutoff): | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
# Y = [f[X[i]] + N[i] for i in xrange(size)] | |
# Y = np.random.permutation(Y).tolist() | |
Y = generate_X(srcX, size) | |
dc_score = dc(dc_compat(X), dc_compat(Y)) | |
dr_score = dr(X, Y, level) | |
cisc_score = cisc(X, Y) | |
diffs_dc.append(abs(dc_score[0] - dc_score[1])) | |
diffs_dr.append(abs(dr_score[0] - dr_score[1])) | |
diffs_cisc.append(abs(cisc_score[0] - cisc_score[1])) | |
cutoff_dc = np.percentile(diffs_dc, 5) | |
cutoff_dr = np.percentile(diffs_dr, 5) | |
cutoff_cisc = np.percentile(diffs_cisc, 5) | |
print cutoff_dc, cutoff_dr, cutoff_cisc | |
diffs_dc, diffs_dr, diffs_cisc = [], [], [] | |
for i in xrange(nsim_power): | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
dc_score = dc(dc_compat(X), dc_compat(Y)) | |
dr_score = dr(X, Y, level) | |
cisc_score = cisc(X, Y) | |
diffs_dc.append(abs(dc_score[0] - dc_score[1])) | |
diffs_dr.append(abs(dr_score[0] - dr_score[1])) | |
diffs_cisc.append(abs(cisc_score[0] - cisc_score[1])) | |
power_dc = sum(diffs_dc > cutoff_dc) / nsim_power | |
power_dr = sum(diffs_dr > cutoff_dr) / nsim_power | |
power_cisc = sum(diffs_cisc > cutoff_cisc) / nsim_power | |
print size, power_dc, power_dr, power_cisc | |
def test_significance(): | |
"""Perform permutation testing just as with swap randomisation""" | |
size = 1000 | |
nperm = 50000 | |
srcX = "uniform" | |
suppfX = range(-7, 8) | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
cisc_score = cisc(X, Y) | |
diff_orig = abs(cisc_score[0] - cisc_score[1]) | |
diff_perms = [] | |
for i in xrange(nperm): | |
Y_perm = np.random.permutation(Y) | |
cisc_score = cisc(X, Y_perm) | |
diff_perm = abs(cisc_score[0] - cisc_score[1]) | |
diff_perms.append(diff_perm) | |
p_value = sum(diff_orig < diff_perm for diff_perm in diff_perms) / nperm | |
print diff_orig, sum(diff_orig < diff_perm for diff_perm in diff_perms), p_value | |
def test_hypercompression(): | |
m = 100 | |
size = 100 | |
alpha = 0.01 | |
suppfX = range(-7, 8) | |
srcX = "geometric" | |
fp = open("results/no-hypercompression.dat", "w") | |
diffs = [] | |
decisions = [] # 1=correct, -1=incorrect, 0=wrong | |
for i in xrange(m): | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
crisp_score = crisp(X, Y) | |
diff = abs(crisp_score[0] - crisp_score[1]) | |
if crisp_score[0] < crisp_score[1]: | |
decision = 1 | |
elif crisp_score[0] > crisp_score[1]: | |
decision = -1 | |
else: | |
continue | |
diffs.append(int(diff)) | |
decisions.append(decision) | |
sorted_diffs_indices = reverse_argsort(diffs) | |
diffs = [diffs[idx] for idx in sorted_diffs_indices] | |
decisions = [decisions[idx] for idx in sorted_diffs_indices] | |
# flags for coloring | |
# correct, significant = 1 | |
# correct, insignificant = 2 | |
# incorrect, significant = 3 | |
# incorrect, insignificant = 4 | |
fp.write("sn\tdiff\tsig\tdec\tcolor\n") # header | |
for k, diff in enumerate(diffs, 1): | |
log_p_value = -diff | |
bh_stat = k * alpha / m | |
log_bh_stat = math.log(bh_stat, 2) | |
if log_bh_stat < log_p_value: | |
significant = 0 | |
if decisions[k - 1] == 1: | |
color = 2 | |
else: | |
color = 4 | |
else: | |
significant = 1 | |
if decisions[k - 1] == 1: | |
color = 1 | |
elif decisions[k - 1] == -1: | |
color = 3 | |
fp.write("%i\t%d\t%d\t%d\t%d\n" % | |
(k, diff, significant, decisions[k - 1], color)) | |
# if log_bh_stat < log_p_value: | |
# # reject: not significant | |
# fp.write("%i\t%d\t%d\t%d\n" % (k, diff, 0, decisions[k - 1])) | |
# print k, diff, log_bh_stat, log_p_value, 0, decisions[k - 1] | |
# else: | |
# fp.write("%i\t%d\t%d\t%d\n" % | |
# (k, diff, 1, decisions[k - 1])) # accept: significant | |
# print k, diff, 1, decisions[k - 1] | |
# fp.write("%i\t%d\n" % (k, diff)) | |
fp.close() | |
def test_sample_size(): | |
nsim = 1000 | |
level = 0.05 | |
sizes = [100, 400, 700, 1000, 1300, 1600, 2000] | |
suppfX = range(-7, 8) | |
srcX = "geometric" | |
fp = open("results/acc-size.dat", "w") | |
diffs = [] | |
fp.write("%s\t%s\t%s\t%s\t%s\t%s\n" % | |
("size", "dc", "dr", "cisc", "crispe", "crisp")) | |
print "%s\t%s\t%s\t%s\t%s\t%s" % ("size", "dc", "dr", "cisc", "crispe", "crisp") | |
sys.stdout.flush() | |
# progress(0, len(sizes)) | |
for k, size in enumerate(sizes): | |
nsamples = 0 | |
nc_dc, nc_dr, nc_cisc, nc_crispe, nc_crisp = 0, 0, 0, 0, 0 | |
while nsamples < nsim: | |
X = generate_X(srcX, size) | |
suppX = list(set(X)) | |
f = map_randomly(suppX, suppfX) | |
N = generate_additive_N(size) | |
Y = [f[X[i]] + N[i] for i in xrange(size)] | |
# check if f(x) + supp N are disjoint for x in domx | |
suppN = set(N) | |
decomps = [] | |
for x in suppX: | |
fx = f[x] | |
sum_fx_suppN = set([fx + n for n in suppN]) | |
decomps.append(sum_fx_suppN) | |
non_overlapping_noise = are_disjoint(decomps) | |
if non_overlapping_noise: | |
continue | |
nsamples += 1 | |
dc_score = dc(dc_compat(X), dc_compat(Y)) | |
dr_score = dr(X, Y, level) | |
cisc_score = cisc(X, Y) | |
crispe_score = crispe(X, Y) | |
crisp_score = crisp(X, Y) | |
nc_dc += int(dc_score[0] < dc_score[1]) | |
nc_dr += int(dr_score[0] > level and dr_score[1] < level) | |
nc_cisc += int(cisc_score[0] < cisc_score[1]) | |
nc_crispe += int(crispe_score[0] < crispe_score[1]) | |
nc_crisp += int(crisp_score[0] < crisp_score[1]) | |
acc_dc = nc_dc * 100 / nsim | |
acc_dr = nc_dr * 100 / nsim | |
acc_cisc = nc_cisc * 100 / nsim | |
acc_crispe = nc_crispe * 100 / nsim | |
acc_crisp = nc_crisp * 100 / nsim | |
print "%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (size, acc_dc, acc_dr, acc_cisc, acc_crispe, acc_crisp) | |
sys.stdout.flush() | |
fp.write("%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" % | |
(size, acc_dc, acc_dr, acc_cisc, acc_crispe, acc_crisp)) | |
# progress(k + 1, len(sizes)) | |
fp.close() | |
if __name__ == "__main__": | |
test_sample_size() | |
# test_accuracy() |