Skip to content
Permalink
14ad1aa2e8
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
88 lines (73 sloc) 3.31 KB
import math
import pandas as pd
import numpy as np
from scipy.special import comb
QUASI_UNIFORM_CODE_INITIAL_NUMBER = 2.865064
def quasi_uniform_code(n):
l = 0
while n > 1:
n = math.log(n, 2)
l += n
return l + math.log(QUASI_UNIFORM_CODE_INITIAL_NUMBER, 2)
def break_points_number(macro_bin, IDs, ID_threshold):
'''
returns count of the break points in which ID is GREATER OR EQUAL than ID_threshold
:param macro_bin:
:param IDs:
:param ID_threshold:
:return:
'''
# todo old condition as in original IPD: ID > ID_threshold
ID_boolean = [1 if ID > ID_threshold else 0 for ID in IDs[macro_bin[:-1]]]
return sum(ID_boolean)
def compute_bin_cost(c, l, k, macro_bin, IDs, ID_threshold):
macro_bin_size = len(macro_bin)
if macro_bin_size != c - l:
raise ValueError(c + "!=" + l)
macro_bin_size_code = quasi_uniform_code(macro_bin_size)
break_points_size = break_points_number(macro_bin, IDs, ID_threshold)
# todo old in the original ipd L_disc L_N is computed for (k-1)
# L_disc = quasi_uniform_code(k) + math.log(comb(c - 1, k - 1), 2)
L_disc = quasi_uniform_code(k - 1) + math.log(comb(c - 1, k - 1), 2)
# todo old in the original ipd L_disc L_N is computed for (k-1)
# L_disc_prev = - (quasi_uniform_code(k - 1) + math.log(comb(l - 1, k - 2), 2) if k > 1 else 0)
L_disc_prev = - (quasi_uniform_code(k - 2) + math.log(comb(l - 1, k - 2), 2) if k > 1 else 0)
L_disc_M_ind = macro_bin_size_code - math.log(macro_bin_size / c, 2) * (macro_bin_size + 1)
L_disc_M_ind_prev = - (math.log(l / c, 2) * (k - 1 + l) if l > 0 else 0)
L_disc_M_mh = quasi_uniform_code(break_points_size) + math.log(macro_bin_size - 1, 2) * break_points_size \
if break_points_size > 0 else 0
L_errors = math.log(macro_bin_size, 2) * macro_bin_size
return L_disc + L_disc_M_ind + L_disc_M_mh + L_errors + L_disc_prev + L_disc_M_ind_prev
def dynamic_merging(ID_threshold, IDs, init_bins_count):
F = np.zeros([init_bins_count, init_bins_count])
discretizations = []
# compute when we merge first c initial dist_bins into 1 and #macro dist_bins k = 1
k_ = 0
k = k_ + 1
for c_ in range(init_bins_count):
c = c_ + 1
micro_bins = [i for i in range(c)]
F[c_, k_] = compute_bin_cost(c, 0, k, micro_bins, IDs, ID_threshold)
c_disc = [[micro_bins]]
discretizations.append(c_disc)
for k_ in range(1, init_bins_count):
k = k_ + 1
for c_ in range(k_, init_bins_count):
c = c_ + 1
min_F = None
first_l_micro_bins = None
last_micro_bins = None
# search for the best # of microbins in the first (k - 1) macrobins: l
for l_ in range(k_ - 1, c_):
l = l_ + 1
micro_bins = [i for i in range(l, c)]
temp_F = F[l_, k_ - 1] + compute_bin_cost(c, l, k, micro_bins, IDs, ID_threshold)
if not min_F or temp_F < min_F:
min_F = temp_F
first_l_micro_bins = discretizations[l_][k_ - 1]
last_micro_bins = micro_bins
F[c_, k_] = min_F
disc = first_l_micro_bins.copy()
disc.append(last_micro_bins)
discretizations[c_].append(disc)
return F, discretizations