Skip to content
Permalink
03a37c22ba
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
108 lines (98 sloc) 5 KB
import datetime
import numpy as np
import pandas as pd
import util
import os
import interaction_distance as id
from correlation_measures.binning import Binning
# def write(*args):
# log.write(' '.join([str(a) for a in args]))
# log.write('\n')
def average_id(bin1, bin2, dim_maxes):
data0 = data[bin1]
data1 = data[bin2]
orig_binning0 = Binning(data0)
orig_binning1 = Binning(data1)
bin0_map = orig_binning0.equal_frequency_binning(0, int(data0.shape[0] / 141))
bin1_map = orig_binning1.equal_frequency_binning(0, int(data1.shape[0] / 141))
# distinct bins
dist_bins0 = bin0_map.unique()
dist_bins1 = bin1_map.unique()
data0.pop(0)
data1.pop(0)
return sum([id.compute_ID(data0.loc[bin0_map == dist0], data1.loc[bin1_map == dist1], dim_maxes)
for dist0 in dist_bins0 for dist1 in dist_bins1]) / (len(dist_bins0) * len(dist_bins1))
if __name__ == "__main__":
data_file = 'synthetic_cases/synthetic_4d_parity_problem.csv'
# reading data from the file with delimiter and NaN values as "?"
data = pd.read_csv(data_file, delimiter=";", header=None, na_values='?')
# drop a data point if it contains inconsistent data
data = data.dropna(axis=0, how='any')
data.pop(4)
# defining prefix for the output files
# file_name = util.get_file_name(data_file)
# dir = 'logs/id_statistics_' + file_name + "_" + \
# datetime.datetime.now().strftime("_%Y%m%d_%H%M%S") + "/"
# os.makedirs(dir)
# print('output files are:', dir + '*')
# log_file = dir + "log.txt"
# with open(log_file, 'w') as log:
data_max = data.max(0)
print('INSIDE NEGATIVE MACROBIN')
print('+++ <-> +++')
bin1_1 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bin1_1)
print('+-- <-> +--')
bin1_2 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bin1_2)
print('+++ <-> +--')
bin1_3 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bin1_3)
print('+-- <-> -+-')
bin1_4 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] < 0)), data_max)
print(bin1_4)
print('average ID in bin1: ', (bin1_1 * 1 + bin1_2 * 3 + bin1_2 * 3 + bin1_3 * 3) / 10)
print('BETWEEN MACROBINS')
print('+-- <-> ---')
bwn_1 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bwn_1)
print('+++ <-> ---')
bwn_2 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bwn_2)
print('+++ <-> -++')
bwn_3 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bwn_3)
print('+-- <-> -++')
bwn_4 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bwn_4)
print('-+- <-> -++')
bwn_5 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bwn_5)
print('average ID in bwn: ', (bwn_2 + bwn_1 * 3 + bwn_3 * 3 + bwn_4 * 3 + bwn_5 * 6) / 16)
print('INSIDE POSITIVE MACROBIN')
print('--- <-> ---')
bin2_1 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bin2_1)
print('-++ <-> -++')
bin2_2 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bin2_2)
print('--- <-> -++')
bin2_3 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bin2_3)
print('-++ <-> +-+')
bin2_4 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] > 0)), data_max)
print(bin2_4)
print('average ID in bin2: ', (bin2_1 * 1 + bin2_2 * 3 + bin2_2 * 3 + bin2_3 * 3) / 10)