Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
size of domain of target variable passed to the stochatic complexity …
…method
  • Loading branch information
kbudhath committed Sep 16, 2017
1 parent 6dbb7eb commit 6bd12ba
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 25 deletions.
5 changes: 3 additions & 2 deletions crisp.py
Expand Up @@ -34,6 +34,7 @@ def regress(X, Y):
# target Y, feature X
max_iterations = 10000
scx = stochastic_complexity(X)
len_dom_y = len(set(Y))
# print scx,
f = map_to_majority(X, Y)

Expand All @@ -42,7 +43,7 @@ def regress(X, Y):

pair = zip(X, Y)
res = [y - f[x] for x, y in pair]
cur_res_codelen = stochastic_complexity(res)
cur_res_codelen = stochastic_complexity(res, len_dom_y)

j = 0
minimized = True
Expand All @@ -60,7 +61,7 @@ def regress(X, Y):

res = [y - f[x] if x != x_to_map else y -
cand_y for x, y in pair]
res_codelen = stochastic_complexity(res)
res_codelen = stochastic_complexity(res, len_dom_y)

if res_codelen < best_res_codelen:
best_res_codelen = res_codelen
Expand Down
33 changes: 26 additions & 7 deletions test_real.py
Expand Up @@ -9,8 +9,8 @@

import numpy as np

# from cisc import cisc
from anms import cisc
from cisc import cisc
from crisp import crisp
from dr import dr
from dc import dc
from utils import dc_compat
Expand Down Expand Up @@ -163,10 +163,11 @@ def test_abalone():
for i in xrange(1, ncols):
Y = data[:, i]
cisc_score = cisc(sex, Y)
crisp_score = crisp(sex, Y)
dr_score = dr(sex.tolist(), Y.tolist(), level)
dc_score = dc(dc_compat(sex), dc_compat(Y))

print "CISC::", abs(cisc_score[0] - cisc_score[1]), cisc_score,
print "CISC::",
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % ("Sex", colnames[i]),
elif cisc_score[0] > cisc_score[1]:
Expand All @@ -193,6 +194,14 @@ def test_abalone():
print "%s ~ %s" % ("Sex", colnames[i]),
print

print "CRISP::",
if crisp_score[0] < crisp_score[1]:
print "%s ⇒ %s" % ("Sex", colnames[i]),
elif crisp_score[0] > crisp_score[1]:
print "%s ⇐ %s" % ("Sex", colnames[i]),
else:
print "%s ~ %s" % ("Sex", colnames[i]),
print
print


Expand All @@ -206,10 +215,11 @@ def test_nlschools():
status = data[:, 1]

cisc_score = cisc(score, status)
crisp_score = cisc(score, status)
dr_score = dr(score.tolist(), status.tolist(), level)
dc_score = dc(dc_compat(score), dc_compat(status))

print "CISC::", abs(cisc_score[0] - cisc_score[1]), cisc_score,
print "CISC::",
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % ("score", "status"),
elif cisc_score[0] > cisc_score[1]:
Expand All @@ -236,6 +246,15 @@ def test_nlschools():
print "%s ~ %s" % ("score", "status"),
print

print "CRISP::",
if crisp_score[0] < crisp_score[1]:
print "%s ⇒ %s" % ("score", "status"),
elif crisp_score[0] > crisp_score[1]:
print "%s ⇐ %s" % ("score", "status"),
else:
print "%s ~ %s" % ("score", "status"),
print


def test_acute():
print "testing cisc on acute inflammation dataset"
Expand Down Expand Up @@ -304,10 +323,10 @@ def test_faces():


if __name__ == "__main__":
test_faces()
# test_faces()
# test_car()
# test_abalone()
# test_nlschools()
test_abalone()
test_nlschools()

# test_acute()
# test_nursery()
Expand Down
68 changes: 52 additions & 16 deletions test_synthetic.py
Expand Up @@ -183,7 +183,7 @@ def _decision_rate(srcX):
def test_accuracy():
nsim = 5000
size = 5000
level = 0.05
level = 0.01
suppfX = range(-7, 8)
srcsX = ["uniform", "binomial", "negativeBinomial",
"geometric", "hypergeometric", "poisson", "multinomial"]
Expand Down Expand Up @@ -453,14 +453,15 @@ def test_significance():


def test_hypercompression():
m = 500
size = 2000
alpha = 0.001
m = 100
size = 100
alpha = 0.01
suppfX = range(-7, 8)
srcX = "uniform"
srcX = "geometric"

# fp = open("results/no-hypercompression.dat", "w")
fp = open("results/no-hypercompression.dat", "w")
diffs = []
decisions = [] # 1=correct, -1=incorrect, 0=wrong
for i in xrange(m):
X = generate_X(srcX, size)
suppX = list(set(X))
Expand All @@ -469,24 +470,58 @@ def test_hypercompression():
Y = [f[X[i]] + N[i] for i in xrange(size)]
crisp_score = crisp(X, Y)
diff = abs(crisp_score[0] - crisp_score[1])
if crisp_score[0] > crisp_score[1]:
print "wrong inf", diff

if crisp_score[0] < crisp_score[1]:
decision = 1
elif crisp_score[0] > crisp_score[1]:
decision = -1
else:
continue

diffs.append(int(diff))
diffs = sorted(diffs, reverse=True)
# fp.write("sn\tdiff\tsig\n") # header
decisions.append(decision)
sorted_diffs_indices = reverse_argsort(diffs)
diffs = [diffs[idx] for idx in sorted_diffs_indices]
decisions = [decisions[idx] for idx in sorted_diffs_indices]

# flags for coloring
# correct, significant = 1
# correct, insignificant = 2
# incorrect, significant = 3
# incorrect, insignificant = 4

fp.write("sn\tdiff\tsig\tdec\tcolor\n") # header
for k, diff in enumerate(diffs, 1):
log_p_value = -diff
bh_stat = k * alpha / m
log_bh_stat = math.log(bh_stat, 2)

if log_bh_stat < log_p_value:
# fp.write("%i\t%d\t%d\n" % (k, diff, 0)) #reject: not significant
print k, diff, log_bh_stat, log_p_value, 0
significant = 0
if decisions[k - 1] == 1:
color = 2
else:
color = 4
else:
# fp.write("%i\t%d\t%d\n" % (k, diff, 1)) #accept: significant
print k, diff, log_bh_stat, log_p_value, 1
significant = 1
if decisions[k - 1] == 1:
color = 1
elif decisions[k - 1] == -1:
color = 3

fp.write("%i\t%d\t%d\t%d\t%d\n" %
(k, diff, significant, decisions[k - 1], color))
# if log_bh_stat < log_p_value:
# # reject: not significant
# fp.write("%i\t%d\t%d\t%d\n" % (k, diff, 0, decisions[k - 1]))
# print k, diff, log_bh_stat, log_p_value, 0, decisions[k - 1]
# else:
# fp.write("%i\t%d\t%d\t%d\n" %
# (k, diff, 1, decisions[k - 1])) # accept: significant
# print k, diff, 1, decisions[k - 1]

# fp.write("%i\t%d\n" % (k, diff))
# fp.close()
fp.close()


def test_sample_size():
Expand Down Expand Up @@ -532,5 +567,6 @@ def test_sample_size():


if __name__ == "__main__":
test_sample_size()
# test_hypercompression()
# test_sample_size()
test_accuracy()

0 comments on commit 6bd12ba

Please sign in to comment.