Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
added new baseline method, PREDEFINED_SUBSPACESETS_NAIVE
- Loading branch information
Tatiana Dembelova
committed
Oct 19, 2017
1 parent
1aa01ab
commit f9ee841
Showing
11 changed files
with
238 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import numpy as np | ||
import experiments_logging as log | ||
import pandas as pd | ||
import interaction_distance as id | ||
import data_generator as dg | ||
|
||
def evidence_ID(): | ||
# no interaction | ||
b = np.matrix(np.random.uniform(1, 2, (4000, 1))) | ||
back = np.matrix(np.random.uniform(0, 2, (4000, 1))) | ||
res = np.append(b, back, axis=0) | ||
b1 = np.matrix(np.random.uniform(0, 2, (8000, 1))) | ||
|
||
# either horizontal or vertical tube | ||
# all = np.append(b1, res, axis=1) | ||
all = np.append(res, b1, axis=1) | ||
df = pd.DataFrame(all) | ||
df = df.sort_values(by=0).reset_index(drop=True) | ||
print(id.compute_ID(df.loc[:100, 1].to_frame(), df.loc[7900:8000, 1].to_frame(), [0, 2])) | ||
# log.plot_data_2d(df) | ||
# log.plot_data_2d(pd.concat([df.loc[:100], df.loc[7900:8000]], axis=0)) | ||
|
||
# cube interaction | ||
b = np.matrix(np.random.uniform(1, 2, (4000, 1))) | ||
back = np.matrix(np.random.uniform(0, 2, (4000, 1))) | ||
res = np.append(b, back, axis=0) | ||
b1 = np.matrix(np.random.uniform(1, 2, (4000, 1))) | ||
back1 = np.matrix(np.random.uniform(0, 2, (4000, 1))) | ||
res1 = np.append(b1, back1, axis=0) | ||
|
||
all = np.append(res, res1, axis=1) | ||
df = pd.DataFrame(all) | ||
df = df.sort_values(by=0).reset_index(drop=True) | ||
print(id.compute_ID(df.loc[:100, 1].to_frame(), df.loc[7900:8000, 1].to_frame(), [0, 2])) | ||
# log.plot_data_2d(df) | ||
# log.plot_data_2d(pd.concat([df.loc[:100], df.loc[7900:8000]], axis=0)) | ||
|
||
cg = dg.produce_cube_generator(7, 0, 2, "i", ".csv") | ||
data, filname = cg.build() | ||
print(cg.subspaces) | ||
print(cg.perf_disc) | ||
|
||
data = pd.DataFrame(data) | ||
dim_count = data.shape[1] | ||
for curr in data[:-1]: | ||
dims = data.columns.tolist() | ||
dims.remove(curr) | ||
dims.remove(dim_count - 1) | ||
curr_data = data.sort_values(by=curr).reset_index(drop=True).loc[:, dims] | ||
rows = curr_data.shape[0] | ||
print('curr dimension', curr) | ||
for dim in dims: | ||
counter = 0 | ||
ids = [] | ||
while(True): | ||
if counter + 280 > rows: | ||
break | ||
ids.append(id.compute_ID(curr_data.loc[counter:counter + 140, dim].to_frame(), | ||
curr_data.loc[counter + 140: counter + 280, dim].to_frame(), [2] * dim_count)) | ||
counter += 1 | ||
# needs data normalization todo | ||
print('interaction with', dim, np.average(ids)) | ||
# break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import constants as cst | ||
import subprocess as sp | ||
import re | ||
import os | ||
|
||
|
||
def run_random_forest1(base_dir_name, experiment_name): | ||
file_path = cst.BASE + base_dir_name + "/" + experiment_name + "/out.arff" | ||
if not os.path.exists(file_path): | ||
return None | ||
try: | ||
output = str(sp.check_output(["java", "-cp", cst.WEKA_BIN, | ||
"weka.classifiers.trees.RandomForest", '-P', '100', '-I', | ||
'100', '-num-slots', '1', '-K', '0', '-M', '1.0', '-V', '0.001', '-S', '1', | ||
"-t", file_path], timeout=30)) | ||
match = re.search('Correctly Classified Instances\s+\d+\s+(\d+\.\d+)\s+%', output) | ||
if match: | ||
return experiment_name + "," + match.group(1) | ||
return experiment_name + ",?" | ||
except sp.TimeoutExpired: | ||
print("timeout exceeded", experiment_name) | ||
return experiment_name + ",?" | ||
|
||
|
||
def classify_experiments(base_dir_name): | ||
results = [] | ||
for experiment in os.listdir(cst.BASE + base_dir_name): | ||
if 'cubes' not in experiment: | ||
continue | ||
classification = run_random_forest1(base_dir_name, experiment) | ||
results.append(classification) | ||
results.append("\n") | ||
return results | ||
|
||
if __name__ == '__main__': | ||
base_dir_name = "logs_test" | ||
res = classify_experiments(base_dir_name) | ||
with open(cst.BASE + base_dir_name + "/Classification.csv", "w") as f: | ||
f.writelines(res) |
Oops, something went wrong.