Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
new synthetic data sets for cube in cube datasets (with added irrelev…
…ant features)
  • Loading branch information
Tatiana Dembelova committed Aug 12, 2017
1 parent 83cd901 commit a92a56e
Show file tree
Hide file tree
Showing 40 changed files with 700,167 additions and 19,973 deletions.
2 changes: 1 addition & 1 deletion constants.py
Expand Up @@ -20,7 +20,7 @@ class DistanceMeasure(Enum):
CJS = 2


ID_THRESHOLD_QUANTILE = 0.90
ID_THRESHOLD_QUANTILE = 0.80
ID_SLIDING_WINDOW = 40

NORMALIZATION_RADIUS = 1
Expand Down
34 changes: 20 additions & 14 deletions data_generation.py
Expand Up @@ -93,9 +93,13 @@ def synthetic_cube_in_cube(m, r, i, side, sigma=0.5):
h = int(m * sigma)
range = [-0.5, 0] if side == 'l' else [-0.25, 0.25] if side == 'm' else [0, 0.5]
contra_range = [0, 0.5] if side == 'l' else [-0.25, 0.25] if side == 'm' else [-0.5, 0]
r_dims = np.concatenate((np.concatenate(
(np.random.uniform(range[0], range[1], (h, 1)), np.random.uniform(contra_range[0], contra_range[1], (h, 1))),
axis=1), np.random.uniform(-0.5, 0.5, (m - h, r))), axis=0)
r_dims = np.concatenate((
# np.concatenate(
# (np.random.uniform(range[0], range[1], (h, 1)),
# np.random.uniform(contra_range[0], contra_range[1], (h, 1))),
# axis=1)
np.random.uniform(range[0], range[1], (h, r))
, np.random.uniform(-0.5, 0.5, (m - h, r))), axis=0)
i_dims = np.random.uniform(-0.5, 0.5, (m, i)) if i > 0 else np.empty((m, i))
data = np.concatenate((r_dims, i_dims), axis=1)

Expand All @@ -121,7 +125,7 @@ def append_irrelevant_features(file, n):

def generate():
# -------generating dataset
# data = synthetic_cube_in_cube(rows, rel_features, irrel_features, 'r')
# data = synthetic_cube_in_cube(rows, rel_features, irrel_features, 'l')
# data__ = synthetic_cjs()
#
# # add zeroes as default class
Expand All @@ -133,16 +137,18 @@ def generate():


if __name__ == '__main__':
for i in [1,2,3,4,5,10]:
file = 'synthetic_cases/synthetic_10d_parity_problem_' + str(i) + '.csv'
source = 'synthetic_cases/synthetic_10d_parity_problem.csv'
for j in [3,4,5,10]:
for i in [1,2,3,4,5,10]:
# file = 'synthetic_cases/synthetic_cube_in_cube_10.csv'
file = 'synthetic_cases/synthetic_cube_in_cube_' + str(j) + '_' + str(i) + '.csv'
source = 'synthetic_cases/synthetic_cube_in_cube_' + str(j) + '.csv'

if os.path.isfile(file):
raise ValueError(file + " already exists!")
if os.path.isfile(file):
raise ValueError(file + " already exists!")

# parameters
rows = 20000
rel_features = 2
irrel_features = 10
# parameters
rows = 20000
rel_features = 10
irrel_features = i

generate()
generate()
4 changes: 2 additions & 2 deletions experiments_logging.py
Expand Up @@ -65,5 +65,5 @@ def write_cut_file(name, disc_intervals):
if __name__ == '__main__':
# rows = 20000
# data = np.concatenate((synthetic_cube_in_cube(rows, 2, 0), np.zeros((rows, 1))), axis=1)
data = pd.read_csv("synthetic_cases/synthetic_right_bottom_cube_in_cube_2_0.csv", delimiter=";", header=None, na_values='?')
plot_data_2d(data)
data = pd.read_csv("synthetic_cases/synthetic_cube_in_cube_5.csv", delimiter=";", header=None, na_values='?')
plot_data_3d(data)
5 changes: 4 additions & 1 deletion main.py
Expand Up @@ -2,6 +2,9 @@
import sys

import datetime
# todo fix for server push
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -248,7 +251,7 @@ def get_discretized_points(curr, data, discretizations, dist_bins, min_id, rank_

# defining prefix for the output files
data_file_name = util.get_file_name(data_file)
dir = 'logs2/' + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") \
dir = 'logs3/' + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") \
+ "_" + distance_measure.name \
+ ("_" + cor_measure.name if cor_measure else "") \
+ "_" + method.name \
Expand Down
185 changes: 185 additions & 0 deletions run.sh
@@ -0,0 +1,185 @@
#!/usr/bin/env bash
#storing the output files into logs3 directory

# -------------ID experiments-------------

# ----original synthetic cases----
python main.py -f=synthetic_cases/synthetic_2d_parity_problem.csv &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem.csv &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem.csv &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem.csv &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem.csv &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_2.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10.csv &

# ----with added irrelevant features----

python main.py -f=synthetic_cases/synthetic_2d_parity_problem_1.csv &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_2.csv &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_3.csv &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_4.csv &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_5.csv &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_10.csv &

python main.py -f=synthetic_cases/synthetic_3d_parity_problem_1.csv &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_2.csv &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_3.csv &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_4.csv &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_5.csv &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_10.csv &

python main.py -f=synthetic_cases/synthetic_4d_parity_problem_1.csv &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_2.csv &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_3.csv &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_4.csv &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_5.csv &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_10.csv &

python main.py -f=synthetic_cases/synthetic_5d_parity_problem_1.csv &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_2.csv &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_3.csv &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_4.csv &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_5.csv &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_10.csv &

python main.py -f=synthetic_cases/synthetic_10d_parity_problem_1.csv &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_2.csv &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_3.csv &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_4.csv &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_5.csv &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_10.csv &


python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_1.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_2.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_3.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_4.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_5.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_10.csv &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_1.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_2.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_3.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_4.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_5.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_10.csv &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_1.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_2.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_3.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_4.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_5.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_10.csv &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_1.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_2.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_3.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_4.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_5.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_10.csv &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_1.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_2.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_3.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_4.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_5.csv &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_10.csv &

# ----with added irrelevant features and proposed algorithms----

python main.py -f=synthetic_cases/synthetic_2d_parity_problem_1.csv -m=greedy_topk -cor=uds &
#todo add more commands

# -------------CJS experiments-------------

# ----original synthetic cases----
python main.py -f=synthetic_cases/synthetic_2d_parity_problem.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10.csv -dist=CJS &

# ----with added irrelevant features----

python main.py -f=synthetic_cases/synthetic_2d_parity_problem_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_2d_parity_problem_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_3d_parity_problem_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_3d_parity_problem_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_4d_parity_problem_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_4d_parity_problem_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_5d_parity_problem_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_5d_parity_problem_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_10d_parity_problem_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_10d_parity_problem_10.csv -dist=CJS &


python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_2_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_3_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_4_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_5_10.csv -dist=CJS &

python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_1.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_2.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_3.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_4.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_5.csv -dist=CJS &
python main.py -f=synthetic_cases/synthetic_cube_in_cube_10_10.csv -dist=CJS &

wait

0 comments on commit a92a56e

Please sign in to comment.