Skip to content
Permalink
6bd12bac64
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
333 lines (283 sloc) 10.3 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests on real-world data.
"""
from __future__ import division
import csv
import os
import numpy as np
from cisc import cisc
from crisp import crisp
from dr import dr
from dc import dc
from utils import dc_compat
__author__ = "Kailash Budhathoki"
__email__ = "kbudhath@mpi-inf.mpg.de"
__copyright__ = "Copyright (c) 2017"
__license__ = "MIT"
def test_nursery():
print "testing cisc on nursery dataset"
nursery_dir = os.path.join(os.path.dirname(__file__), "data", "nursery")
nursery_dat_path = os.path.join(nursery_dir, "nursery.dat")
X_labels = ["parents", "has_nurs",
"family_form", "children", "housing", "finance", "social", "health"]
Y_label = "application_evaluation"
data = np.loadtxt(nursery_dat_path)
nattr = data.shape[1]
Y = data[:, nattr - 1]
for i in xrange(nattr - 1):
X = data[:, i]
cisc_score = cisc(X, Y)
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % (X_labels[i], Y_label),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % (X_labels[i], Y_label),
else:
print "%s ~ %s" % (X_labels[i], Y_label),
print
print
def test_car():
print "testing car dataset"
level = 0.05
car_dir = os.path.join(os.path.dirname(__file__), "data", "car")
car_dat_path = os.path.join(car_dir, "car.dat")
X_labels = ["buying price", "maintenance",
"#dorrs", "capacity", "luggage boot", "safety"]
Y_label = "car acceptibility"
data = np.loadtxt(car_dat_path)
nattr = data.shape[1]
Y = data[:, nattr - 1]
for i in xrange(nattr - 1):
X = data[:, i]
cisc_score = cisc(X, Y)
dr_score = dr(X.tolist(), Y.tolist(), level)
dc_score = dc(dc_compat(X), dc_compat(Y))
print "CISC::", abs(cisc_score[0] - cisc_score[1]), cisc_score,
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % (X_labels[i], Y_label),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % (X_labels[i], Y_label),
else:
print "%s ~ %s" % (X_labels[i], Y_label),
print
print "DC::",
if dc_score[0] < dc_score[1]:
print "%s ⇒ %s" % (X_labels[i], Y_label),
elif dc_score[0] > dc_score[1]:
print "%s ⇐ %s" % (X_labels[i], Y_label),
else:
print "%s ~ %s" % (X_labels[i], Y_label),
print
print "DR::",
if dr_score[0] > level and dr_score[1] < level:
print "%s ⇒ %s" % (X_labels[i], Y_label),
elif dr_score[0] < level and dr_score[1] > level:
print "%s ⇐ %s" % (X_labels[i], Y_label),
else:
print "%s ~ %s" % (X_labels[i], Y_label),
print
print
def test_weather():
print "testing cisc on weather dataset"
weather_dir = os.path.join(os.path.dirname(__file__), "data", "weather")
weather_dat_path = os.path.join(weather_dir, "weather.dat")
Xs, Ys = [[]] * 4, [[]] * 4
with open(weather_dat_path, "r") as csvfile:
reader = csv.reader(csvfile, delimiter=" ")
for row in reader:
assert len(row) == 8
for i in xrange(4):
Xs[i].append(row[i])
for i in xrange(4):
Ys[i].append(row[i + 4])
assert len(Xs[0]) == len(Xs[1]) == len(Xs[2]) == len(Xs[3])
assert len(Ys[0]) == len(Ys[1]) == len(Ys[2]) == len(Ys[3])
for i in xrange(4):
X = Xs[i]
Y = Ys[i]
cisc_score = cisc(X, Y)
if cisc_score[0] < cisc_score[1]:
print "X_%d ⇒ Y_%d" % (i, i)
elif cisc_score[0] > cisc_score[1]:
print "Y_%d ⇒ X_%d" % (i, i)
else:
print "X_%d ~ Y_%d" % (i, i)
print
def test_adult():
print "testing cisc on adult dataset"
adult_dir = os.path.join(os.path.dirname(__file__), "data", "adult")
adult_dat_path = os.path.join(adult_dir, "adult.dat")
data = np.loadtxt(adult_dat_path)
ncols = data.shape[1]
income = map(int, data[:, ncols - 1])
colnames = ["workclass", "education", "occupation"]
for i in xrange(ncols - 1):
X = map(int, data[:, i])
cisc_score = cisc(X, income)
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % (colnames[i], "income"),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % (colnames[i], "income"),
else:
print "%s ~ %s" % (colnames[i], "income"),
print cisc_score
print
def test_abalone():
# We do not discretise the variables just as what DR paper does
print "testing cisc on abalone dataset"
level = 0.05
abalone_dir = os.path.join(os.path.dirname(__file__), "data", "abalone")
abalone_dat_path = os.path.join(abalone_dir, "abalone.dat")
data = np.loadtxt(abalone_dat_path)
ncols = data.shape[1]
sex = data[:, 0]
colnames = ["Sex", "Length", "Diameter", "Height"]
for i in xrange(1, ncols):
Y = data[:, i]
cisc_score = cisc(sex, Y)
crisp_score = crisp(sex, Y)
dr_score = dr(sex.tolist(), Y.tolist(), level)
dc_score = dc(dc_compat(sex), dc_compat(Y))
print "CISC::",
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % ("Sex", colnames[i]),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % ("Sex", colnames[i]),
else:
print "%s ~ %s" % ("Sex", colnames[i]),
print
print "DC::",
if dc_score[0] < dc_score[1]:
print "%s ⇒ %s" % ("Sex", colnames[i]),
elif dc_score[0] > dc_score[1]:
print "%s ⇐ %s" % ("Sex", colnames[i]),
else:
print "%s ~ %s" % ("Sex", colnames[i]),
print
print "DR::",
if dr_score[0] > level and dr_score[1] < level:
print "%s ⇒ %s" % ("Sex", colnames[i]),
elif dr_score[0] < level and dr_score[1] > level:
print "%s ⇐ %s" % ("Sex", colnames[i]),
else:
print "%s ~ %s" % ("Sex", colnames[i]),
print
print "CRISP::",
if crisp_score[0] < crisp_score[1]:
print "%s ⇒ %s" % ("Sex", colnames[i]),
elif crisp_score[0] > crisp_score[1]:
print "%s ⇐ %s" % ("Sex", colnames[i]),
else:
print "%s ~ %s" % ("Sex", colnames[i]),
print
print
def test_nlschools():
print "testing nlschools dataset"
level = 0.05
abalone_dir = os.path.join(os.path.dirname(__file__), "data", "nlschools")
abalone_dat_path = os.path.join(abalone_dir, "nlschools.dat")
data = np.loadtxt(abalone_dat_path)
score = data[:, 0]
status = data[:, 1]
cisc_score = cisc(score, status)
crisp_score = cisc(score, status)
dr_score = dr(score.tolist(), status.tolist(), level)
dc_score = dc(dc_compat(score), dc_compat(status))
print "CISC::",
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % ("score", "status"),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % ("score", "status"),
else:
print "%s ~ %s" % ("score", "status"),
print
print "DC::",
if dc_score[0] < dc_score[1]:
print "%s ⇒ %s" % ("score", "status"),
elif dc_score[0] > dc_score[1]:
print "%s ⇐ %s" % ("score", "status"),
else:
print "%s ~ %s" % ("score", "status"),
print
print "DR::",
if dr_score[0] > level and dr_score[1] < level:
print "%s ⇒ %s" % ("score", "status"),
elif dr_score[0] < level and dr_score[1] > level:
print "%s ⇐ %s" % ("score", "status"),
else:
print "%s ~ %s" % ("score", "status"),
print
print "CRISP::",
if crisp_score[0] < crisp_score[1]:
print "%s ⇒ %s" % ("score", "status"),
elif crisp_score[0] > crisp_score[1]:
print "%s ⇐ %s" % ("score", "status"),
else:
print "%s ~ %s" % ("score", "status"),
print
def test_acute():
print "testing cisc on acute inflammation dataset"
abalone_dir = os.path.join(os.path.dirname(__file__), "data", "acute")
abalone_dat_path = os.path.join(abalone_dir, "acute.tsv")
data = np.loadtxt(abalone_dat_path)
diag1 = data[:, 6]
diag2 = data[:, 7]
colnames = ["temperature", "nausea", "lumber pain",
"urine pushing", "micturition pains", "burning of urethra"]
diagnoses = ["Inflammation of urinary bladder",
"Nephritis of renal pelvis origin "]
for i in xrange(6):
symptom = data[:, i]
cisc_score = cisc(diag1, symptom)
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % (diagnoses[0], colnames[i]),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % (diagnoses[0], colnames[i]),
else:
print "%s ~ %s" % (diagnoses[0], colnames[i]),
print cisc_score
cisc_score = cisc(diag2, symptom)
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % (diagnoses[1], colnames[i]),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % (diagnoses[1], colnames[i]),
else:
print "%s ~ %s" % (diagnoses[1], colnames[i]),
print cisc_score
print
def test_faces():
print "testing cisc on faces dataset"
level = 0.05
faces_dir = os.path.join(os.path.dirname(__file__), "data", "faces")
faces_dat_path = os.path.join(faces_dir, "faces.tsv")
data = np.loadtxt(faces_dat_path)
parameter = data[:, 0]
answer = data[:, 1]
cisc_score = cisc(parameter, answer)
print cisc_score, abs(cisc_score[0] - cisc_score[1])
if cisc_score[0] < cisc_score[1]:
print "%s ⇒ %s" % ("parameter", "answer"),
elif cisc_score[0] > cisc_score[1]:
print "%s ⇐ %s" % ("parameter", "answer"),
else:
print "%s ~ %s" % ("parameter", "answer"),
print cisc_score
print
dr_score = dr(parameter.tolist(), answer.tolist(), level)
print "DR::",
if dr_score[0] > level and dr_score[1] < level:
print "%s ⇒ %s" % ("parameter", "answer"),
elif dr_score[0] < level and dr_score[1] > level:
print "%s ⇐ %s" % ("parameter", "answer"),
else:
print "%s ~ %s" % ("parameter", "answer"),
print
print
if __name__ == "__main__":
# test_faces()
# test_car()
test_abalone()
test_nlschools()
# test_acute()
# test_nursery()
# test_adult()