Skip to content
Permalink
10943bf144
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
85 lines (63 sloc) 1.9 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from collections import Counter, defaultdict
from copy import copy
import random
import sys
import time
from entropy import entropy
def marginals(X, Y):
Ys = defaultdict(list)
for i, x in enumerate(X):
Ys[x].append(Y[i])
return Ys
def map_to_majority(X, Y):
f = dict()
subgroups_y = defaultdict(list)
for i, x in enumerate(X):
subgroups_y[x].append(Y[i])
for x, subgroup_y in subgroups_y.iteritems():
freq_y, _ = Counter(subgroup_y).most_common(1)[0]
f[x] = freq_y
return f
def regress(X, Y):
# target Y, feature X
max_iterations = 10000
hx = entropy(X)
len_dom_y = len(set(Y))
f = map_to_majority(X, Y)
supp_x = list(set(X))
supp_y = list(set(Y))
pair = zip(X, Y)
res = [y - f[x] for x, y in pair]
cur_res_codelen = entropy(res)
j = 0
minimized = True
while j < max_iterations and minimized:
minimized = False
for x_to_map in supp_x:
best_res_codelen = sys.float_info.max
best_cand_y = None
for cand_y in supp_y:
if cand_y == f[x_to_map]:
continue
res = [y - f[x] if x != x_to_map else y -
cand_y for x, y in pair]
res_codelen = entropy(res)
if res_codelen < best_res_codelen:
best_res_codelen = res_codelen
best_cand_y = cand_y
if best_res_codelen < cur_res_codelen:
cur_res_codelen = best_res_codelen
f[x_to_map] = best_cand_y
minimized = True
j += 1
return hx + cur_res_codelen
def acid(X, Y):
hxtoy = regress(X, Y)
hytox = regress(Y, X)
return (hxtoy, hytox)
if __name__ == "__main__":
from test_benchmark import load_pair
X, Y = load_pair(99)
print acid(X, Y)