Skip to content
Permalink
62597337b8
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
122 lines (96 sloc) 3.06 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from collections import Counter, defaultdict
from copy import copy
import random
import sys
import time
from acid import map_to_majority, marginals
from sc import stochastic_complexity
__author__ = "Kailash Budhathoki"
__email__ = "kbudhath@mpi-inf.mpg.de"
__copyright__ = "Copyright (c) 2018"
__license__ = "MIT"
def regress(X, Y):
# target Y, feature X
max_iterations = 10000
scx = stochastic_complexity(X)
len_dom_y = len(set(Y))
# print scx,
f = map_to_majority(X, Y)
supp_x = list(set(X))
supp_y = list(set(Y))
pair = zip(X, Y)
res = [y - f[x] for x, y in pair]
cur_res_codelen = stochastic_complexity(res, len_dom_y)
j = 0
minimized = True
while j < max_iterations and minimized:
minimized = False
for x_to_map in supp_x:
best_res_codelen = sys.float_info.max
best_cand_y = None
for cand_y in supp_y:
if cand_y == f[x_to_map]:
continue
res = [y - f[x] if x != x_to_map else y -
cand_y for x, y in pair]
res_codelen = stochastic_complexity(res, len_dom_y)
if res_codelen < best_res_codelen:
best_res_codelen = res_codelen
best_cand_y = cand_y
if best_res_codelen < cur_res_codelen:
cur_res_codelen = best_res_codelen
f[x_to_map] = best_cand_y
minimized = True
j += 1
# print cur_res_codelen
return scx + cur_res_codelen
def grp(X, Y):
scx = stochastic_complexity(X)
mygx = marginals(X, Y)
ygrps = mygx.values()
sc_ygrps = [stochastic_complexity(Z) for Z in ygrps]
# print "{%.2f}" % (scx + sum(sc_ygrps)),
# print "(%.2f)" % sum(sc_ygrps),
while True:
merge = None
best_gain = 0
for i in range(len(ygrps)):
sci = sc_ygrps[i]
for j in range(i + 1, len(ygrps)):
scj = sc_ygrps[j]
scij = stochastic_complexity(ygrps[i] + ygrps[j])
gain = sci + scj - scij
if gain > best_gain:
merge = (i, j)
best_gain = gain
if not merge:
break
k, l = merge
ygrps[k] = ygrps[k] + ygrps[l]
sc_ygrps[k] = sc_ygrps[k] + sc_ygrps[l] - best_gain
del ygrps[l]
del sc_ygrps[l]
# assert sum(sc_ygrps) == sum(stochastic_complexity(ygrp)
# for ygrp in ygrps)
# print "%.2f" % sum(sc_ygrps),
return scx + sum(sc_ygrps)
def cisc_grp(X, Y):
sxtoy = grp(X, Y)
sytox = grp(Y, X)
return (sxtoy, sytox)
def crisp(X, Y):
# print 'regressing from x to y'
sxtoy = regress(X, Y)
# print 'regressing from y to x'
sytox = regress(Y, X)
return (sxtoy, sytox)
def test():
X = range(10000)
Y = range(10000)
zip(X, Y)
if __name__ == "__main__":
from test_benchmark import load_pair
X, Y = load_pair(99)
print crisp(X, Y)