Skip to content
Permalink
eebe8d803f
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
60 lines (50 sloc) 2.19 KB
import math
import numpy as np
def computeIDs(bin_map, curr, data, dist_bins, dim_maxes):
intra_bin_measures = []
inter_bin_measures = []
data_wo_curr = data.copy()
data_wo_curr.pop(curr) # todo slow?
for bin_id, binn in enumerate(dist_bins):
bin_data = data_wo_curr.loc[bin_map == binn]
# print(bin_data)
points_count = bin_data.shape[0]
prev_bin_data = None
inter_prod_matrix = None
prev_points_count = None
if bin_id > 0:
prev_bin_data = data_wo_curr.loc[bin_map == dist_bins[bin_id - 1]]
# print(prev_bin_data)
prev_points_count = prev_bin_data.shape[0]
inter_prod_matrix = np.ones([points_count, prev_points_count])
intra_prod_matrix = np.ones([points_count, points_count])
# product elements for each dimension
for dim in bin_data:
intra_elem = compute_ID_elem(bin_data[dim], bin_data[dim], dim_maxes[dim])
intra_prod_matrix = np.multiply(intra_prod_matrix, intra_elem)
if bin_id > 0:
inter_elem = compute_ID_elem(bin_data[dim], prev_bin_data[dim], dim_maxes[dim])
inter_prod_matrix = np.multiply(inter_prod_matrix, inter_elem)
intra_bin_measures.append(np.sum(intra_prod_matrix) / points_count ** 2)
if bin_id > 0:
inter_bin_measures.append(2 * np.sum(inter_prod_matrix) / (points_count * prev_points_count))
IDs = []
for c, inter_measure in enumerate(inter_bin_measures):
IDs.append(intra_bin_measures[c] - inter_measure + intra_bin_measures[c + 1])
IDs = np.array(IDs)
return IDs
def compute_ID_elem(bin1, bin2, dim_max):
points_count1 = bin1.shape[0]
points_count2 = bin2.shape[0]
# max_i array
max_array = np.ones([points_count1, points_count2])
max_array.fill(dim_max)
# max_i - max(R^i_{j_1}, R^i_{j_2})
outer_max = np.maximum.outer(bin1, np.transpose(bin2))
return max_array - outer_max
def compute_ID_threshold(IDs):
IDs = IDs.copy()
IDs.sort()
# similar to original ipd (but possibly wrong) todo
return IDs[math.ceil(int(len(IDs) / 3)) - 1]
# return IDs[int(len(IDs) * ID_THRESHOLD_QUANTILE)]