Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
cjs commit
  • Loading branch information
Tatiana Dembelova committed Aug 3, 2017
1 parent 14ad1aa commit 03a37c2
Show file tree
Hide file tree
Showing 38 changed files with 441,212 additions and 92 deletions.
84 changes: 84 additions & 0 deletions 3d_parity_statistics.py
@@ -0,0 +1,84 @@
import datetime
import numpy as np
import pandas as pd
import util
import os
import interaction_distance as id
from correlation_measures.binning import Binning


# def write(*args):
# log.write(' '.join([str(a) for a in args]))
# log.write('\n')


def average_id(bin1, bin2, dim_maxes):
data0 = data[bin1]
data1 = data[bin2]
orig_binning0 = Binning(data0)
orig_binning1 = Binning(data1)
bin0_map = orig_binning0.equal_frequency_binning(0, int(data0.shape[0] / 141))
bin1_map = orig_binning1.equal_frequency_binning(0, int(data1.shape[0] / 141))
# distinct bins
dist_bins0 = bin0_map.unique()
dist_bins1 = bin1_map.unique()
data0.pop(0)
data1.pop(0)
return sum([id.compute_ID(data0.loc[bin0_map == dist0], data1.loc[bin1_map == dist1], dim_maxes)
for dist0 in dist_bins0 for dist1 in dist_bins1]) / (len(dist_bins0) * len(dist_bins1))


if __name__ == "__main__":
data_file = 'synthetic_cases/synthetic_3d_parity_problem.csv'
# reading data from the file with delimiter and NaN values as "?"
data = pd.read_csv(data_file, delimiter=";", header=None, na_values='?')
# drop a data point if it contains inconsistent data
data = data.dropna(axis=0, how='any')
data.pop(3)

# defining prefix for the output files
# file_name = util.get_file_name(data_file)
# dir = 'logs/id_statistics_' + file_name + "_" + \
# datetime.datetime.now().strftime("_%Y%m%d_%H%M%S") + "/"
# os.makedirs(dir)

# print('output files are:', dir + '*')
# log_file = dir + "log.txt"

# with open(log_file, 'w') as log:
data_max = data.max(0)
print('INSIDE NEGATIVE MACROBIN')
print('+- <-> +-')
bin1_1 = average_id(np.logical_and(data[1] > 0, data[2] < 0),
np.logical_and(data[1] > 0, data[2] < 0), data_max)
print(bin1_1)
print('+- <-> -+')
bin1_2 = average_id(np.logical_and(data[1] > 0, data[2] < 0),
np.logical_and(data[1] < 0, data[2] > 0), data_max)
print(bin1_2)
print('average ID in bin1: ', (bin1_1 * 2 + bin1_2) / 3)
print('BETWEEN MACROBINS')
print('+- <-> ++')
bwn_1 = average_id(np.logical_and(data[1] > 0, data[2] < 0),
np.logical_and(data[1] > 0, data[2] > 0), data_max)
print(bwn_1)
print('+- <-> --')
bwn_2= average_id(np.logical_and(data[1] > 0, data[2] < 0),
np.logical_and(data[1] < 0, data[2] < 0), data_max)
print(bwn_2)
print('average ID in bwn: ', (bwn_2 * 2 + bwn_1 * 2) / 4)
print('INSIDE POSITIVE MACROBIN')
print('++ <-> ++')
bin2_1 = average_id(np.logical_and(data[1] > 0, data[2] > 0),
np.logical_and(data[1] > 0, data[2] > 0), data_max)
print(bin2_1)
print('-- <-> --')
bin2_2 = average_id(np.logical_and(data[1] < 0, data[2] < 0),
np.logical_and(data[1] < 0, data[2] < 0), data_max)
print(bin2_2)
print('++ <-> --')
bin2_3 = average_id(np.logical_and(data[1] > 0, data[2] > 0),
np.logical_and(data[1] < 0, data[2] < 0), data_max)
print(bin2_3)
print('average ID in bin2: ', (bin2_1 * 1 + bin2_2 * 1 + bin2_3 * 1) / 3)

108 changes: 108 additions & 0 deletions 4d_parity_statistics.py
@@ -0,0 +1,108 @@
import datetime
import numpy as np
import pandas as pd
import util
import os
import interaction_distance as id
from correlation_measures.binning import Binning


# def write(*args):
# log.write(' '.join([str(a) for a in args]))
# log.write('\n')


def average_id(bin1, bin2, dim_maxes):
data0 = data[bin1]
data1 = data[bin2]
orig_binning0 = Binning(data0)
orig_binning1 = Binning(data1)
bin0_map = orig_binning0.equal_frequency_binning(0, int(data0.shape[0] / 141))
bin1_map = orig_binning1.equal_frequency_binning(0, int(data1.shape[0] / 141))
# distinct bins
dist_bins0 = bin0_map.unique()
dist_bins1 = bin1_map.unique()
data0.pop(0)
data1.pop(0)
return sum([id.compute_ID(data0.loc[bin0_map == dist0], data1.loc[bin1_map == dist1], dim_maxes)
for dist0 in dist_bins0 for dist1 in dist_bins1]) / (len(dist_bins0) * len(dist_bins1))


if __name__ == "__main__":
data_file = 'synthetic_cases/synthetic_4d_parity_problem.csv'
# reading data from the file with delimiter and NaN values as "?"
data = pd.read_csv(data_file, delimiter=";", header=None, na_values='?')
# drop a data point if it contains inconsistent data
data = data.dropna(axis=0, how='any')
data.pop(4)

# defining prefix for the output files
# file_name = util.get_file_name(data_file)
# dir = 'logs/id_statistics_' + file_name + "_" + \
# datetime.datetime.now().strftime("_%Y%m%d_%H%M%S") + "/"
# os.makedirs(dir)

# print('output files are:', dir + '*')
# log_file = dir + "log.txt"

# with open(log_file, 'w') as log:
data_max = data.max(0)
print('INSIDE NEGATIVE MACROBIN')
print('+++ <-> +++')
bin1_1 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bin1_1)
print('+-- <-> +--')
bin1_2 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bin1_2)
print('+++ <-> +--')
bin1_3 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bin1_3)
print('+-- <-> -+-')
bin1_4 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] < 0)), data_max)
print(bin1_4)
print('average ID in bin1: ', (bin1_1 * 1 + bin1_2 * 3 + bin1_2 * 3 + bin1_3 * 3) / 10)
print('BETWEEN MACROBINS')
print('+-- <-> ---')
bwn_1 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bwn_1)
print('+++ <-> ---')
bwn_2 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bwn_2)
print('+++ <-> -++')
bwn_3 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bwn_3)
print('+-- <-> -++')
bwn_4 = average_id(np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bwn_4)
print('-+- <-> -++')
bwn_5 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bwn_5)
print('average ID in bwn: ', (bwn_2 + bwn_1 * 3 + bwn_3 * 3 + bwn_4 * 3 + bwn_5 * 6) / 16)
print('INSIDE POSITIVE MACROBIN')
print('--- <-> ---')
bin2_1 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)), data_max)
print(bin2_1)
print('-++ <-> -++')
bin2_2 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bin2_2)
print('--- <-> -++')
bin2_3 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] < 0, np.logical_and(data[2] > 0, data[3] > 0)), data_max)
print(bin2_3)
print('-++ <-> +-+')
bin2_4 = average_id(np.logical_and(data[1] < 0, np.logical_and(data[2] < 0, data[3] < 0)),
np.logical_and(data[1] > 0, np.logical_and(data[2] < 0, data[3] > 0)), data_max)
print(bin2_4)
print('average ID in bin2: ', (bin2_1 * 1 + bin2_2 * 3 + bin2_2 * 3 + bin2_3 * 3) / 10)

0 comments on commit 03a37c2

Please sign in to comment.