diff --git a/test_synthetic.py b/test_synthetic.py index 56b7106..7c641ef 100644 --- a/test_synthetic.py +++ b/test_synthetic.py @@ -44,6 +44,11 @@ def map_randomly(Xd, fXd): for x in Xd: y = random.choice(fXd) f[x] = y + + # ensure that f is not a constant function + if len(set(f.values())) == 1: + f = map_randomly(Xd, fXd) + assert len(set(f.values())) != 1 return f @@ -181,30 +186,55 @@ def _decision_rate(srcX): # "CISC", "DC", "DR"], "decision rate", "accuracy", "decision rate versus accuracy", "dec_rate_%sX.png" % srcX) +def are_disjoint(sets): + disjoint = True + union = set() + for s in sets: + for x in s: + if x in union: + disjoint = False + break + union.add(x) + return disjoint + + def test_accuracy(): nsim = 1000 - size = 5000 + size = 1000 level = 0.01 suppfX = range(-7, 8) srcsX = ["uniform", "binomial", "negativeBinomial", "geometric", "hypergeometric", "poisson", "multinomial"] - print "-" * 64 + print "-" * 70 print "%18s%10s%10s%10s%10s%10s" % ("TYPE_X", "DC", "DR", "CISC", "CRISPE", "CRISP") - print "-" * 64 + print "-" * 70 sys.stdout.flush() - fp = open("results/acc-dtype.dat", "w") fp.write("%s\t%s\t%s\t%s\t%s\t%s\n" % ("dtype", "dc", "dr", "cisc", "crispe", "crisp")) for srcX in srcsX: + nsamples = 0 nc_dc, nc_dr, nc_cisc, nc_crispe, nc_crisp = 0, 0, 0, 0, 0 - for k in xrange(nsim): + while nsamples < nsim: X = generate_X(srcX, size) suppX = list(set(X)) f = map_randomly(suppX, suppfX) N = generate_additive_N(size) Y = [f[X[i]] + N[i] for i in xrange(size)] + # check if f(x) + supp N are disjoint for x in domx + suppN = set(N) + decomps = [] + for x in suppX: + fx = f[x] + sum_fx_suppN = set([fx + n for n in suppN]) + decomps.append(sum_fx_suppN) + + non_overlapping_noise = are_disjoint(decomps) + if non_overlapping_noise: + continue + + nsamples += 1 dc_score = dc(dc_compat(X), dc_compat(Y)) dr_score = dr(X, Y, level) cisc_score = cisc(X, Y) @@ -217,6 +247,8 @@ def test_accuracy(): nc_crispe += int(crispe_score[0] < crispe_score[1]) nc_crisp += int(crisp_score[0] < crisp_score[1]) + assert nsamples == nsim + acc_dc = nc_dc * 100 / nsim acc_dr = nc_dr * 100 / nsim acc_cisc = nc_cisc * 100 / nsim @@ -226,7 +258,7 @@ def test_accuracy(): sys.stdout.flush() fp.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" % (srcX, acc_dc, acc_dr, acc_cisc, acc_crispe, acc_crisp)) - print "-" * 58 + print "-" * 70 sys.stdout.flush() fp.close() @@ -529,9 +561,9 @@ def test_hypercompression(): def test_sample_size(): - nsim = 5000 + nsim = 500 level = 0.05 - sizes = [50, 100, 500, 1000, 2500, 5000] + sizes = [100, 500, 1000, 2500, 5000] suppfX = range(-7, 8) srcX = "geometric" @@ -539,8 +571,7 @@ def test_sample_size(): diffs = [] fp.write("%s\t%s\t%s\t%s\t%s\t%s\n" % ("size", "dc", "dr", "cisc", "crispe", "crisp")) - print "%s\t%s\t%s\t%s\t%s\t%s\n" % - ("size", "dc", "dr", "cisc", "crispe", "crisp") + print "%s\t%s\t%s\t%s\t%s\t%s" % ("size", "dc", "dr", "cisc", "crispe", "crisp") sys.stdout.flush() # progress(0, len(sizes)) for k, size in enumerate(sizes):