Skip to content
Permalink
8b0e2f8e44
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
187 lines (155 sloc) 7.45 KB
import matplotlib.pyplot as plt
import pandas as pd
import discretization_quality_measure as dq
import data_generation as dg
from mpl_toolkits.mplot3d import Axes3D
import data_generation as dg_old
import util
import numpy as np
def plot_disc(problem, method):
ideal_cuts = dq.parse_cuts("ideal_disc/cut_" + problem + ".txt")
for dist in ['ID', 'CJS']:
for threshold in ['0.3', '0.5', '0.8']:
# dtws_irr = []
for irr_feat in range(11):
fig = plt.figure()
curr = dist + "_ORIGINAL_" + threshold + "_" + problem + ("" if irr_feat == 0 else "_" + str(irr_feat))
cuts = dq.parse_cuts("logs4/" + curr + ".csv/cut.txt")
if not cuts:
continue
for i in range(len(ideal_cuts)):
ax = fig.add_subplot(len(ideal_cuts), 1, i + 1)
ax.hlines(1, -2.5, ideal_cuts[i][-1] + 0.5) # Draw a horizontal line
ax.eventplot(ideal_cuts[i], orientation='horizontal', colors='r', linelengths = 2)
ax.annotate(method.__name__ + ': ' + "{0:.2f}".format(method(ideal_cuts[i], cuts[i]))
+ ', # bins: ' + str(len(cuts[i])), xy=(-2.5, 2.2))
for cut in ideal_cuts[i]:
ax.annotate(str(cut), xy=(cut, 1), xytext=(cut-0.1, -0.2))
ax.hlines(1, -2.5, cuts[i][-1] + 0.5) # Draw a horizontal line
ax.eventplot(cuts[i], orientation='horizontal', colors='b')
for j, cut in enumerate(cuts[i]):
ax.annotate(str(cut), xy=("{0:.1f}".format(cut), 1), xytext=(cut-0.1, 1.6 if j % 2 == 0 else 0.3))
plt.axis('off')
plt.savefig(method.__name__ + "_" + curr + '.png', format='png')
def plot_data_3d(data):
if type(data) is not pd.DataFrame:
data = pd.DataFrame(data)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# data = data[np.logical_and(data[0] < 0, data[1] > 0)]
# 3d parity problem
color_cond = {'b': data[3] == 1,
'k': data[3] == 2,
'r': data[3] == 3,
'g': data[3] == 4,
}
for c in color_cond:
ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
## without coloring
# ax.scatter(data[0], data[1], data[2], c='k', s=1)
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
plt.show()
def save_plot_data_3d(f, data):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# data = data[np.logical_and(data[0] < 0, data[1] > 0)]
## 3d parity problem
# color_cond = {'b': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] < 0)),
# 'k': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] > 0)),
# 'g': np.logical_and(data[0] > 0, data[1] < 0),
# 'r': np.logical_and(data[0] < 0, data[1] < 0),
# 'c': np.logical_and(data[0] > 0, data[1] > 0),
# }
# for c in color_cond:
# ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
## without coloring
ax.scatter(data[0], data[1], data[2], c='k', s=1)
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
plt.savefig(f.replace('.csv', '.png'), format='png')
plt.clf()
def plot_data_2d(data):
plt.scatter(data[0], data[1], s=1, c='k')
plt.xlabel("dim 0")
plt.ylabel("dim 1")
plt.show()
def save_plot_data_2d(f, data):
plt.scatter(data[0], data[1], s=1, c='k')
plt.savefig(f.replace('.csv', '.png'), format='png')
plt.clf()
def write_out_file(problem, disc_intervals, disc_points, class_labels):
lines = ['@relation ' + util.get_escaped_name(problem) + "\n\n"]
counter = [1]
for i in range(len(disc_intervals)):
lines.append(
'@attribute dim' + str(i) + ' {' + ','.join([str(j + counter[-1]) for j in disc_intervals[i]]) + '}\n')
counter.append(counter[-1] + len(disc_intervals[i]))
lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n')
lines.append('@data\n')
for i in range(len(disc_points[0])):
for j in range(len(disc_points)):
lines.append(str(disc_points[j][i] + counter[j]))
lines.append(',')
lines.append('"' + str(class_labels[i]) + '"\n')
return lines
def get_out_files(experiment_name, disc_intervals, disc_points, class_labels, relevant_features):
dat_lines = []
arff_lines = ['@relation ' + experiment_name + "\n\n"]
counter = [1]
for i in range(relevant_features):
arff_lines.append(
'@attribute dim' + str(i) + ' {' + ','.join([str(j + counter[-1]) for j in disc_intervals[i]]) + '}\n')
counter.append(counter[-1] + len(disc_intervals[i]))
arff_lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n')
arff_lines.append('@data\n')
for i in range(len(disc_points[0])):
values = [str(disc_points[j][i] + counter[j]) for j in range(relevant_features)]
dat_line = ' '.join(values)
dat_lines.append(dat_line + " " + str(class_labels[i]) + '\n')
arff_line = ",".join(values)
arff_lines.append(arff_line + ',"' + str(class_labels[i]) + '"\n')
return dat_lines, arff_lines
def get_cut_file(disc_intervals):
lines = []
for i in range(len(disc_intervals)):
lines.append('dimension ' + str(i) + ' (' + str(len(disc_intervals[i])) + ' bins)\n')
for bin in disc_intervals[i]:
lines.append(str(disc_intervals[i][bin][1]) + '\n')
lines.append('-------------------------------------\n')
return lines
def get_cuts(disc_intervals):
return [[disc_intervals[i][bin][1] for bin in disc_intervals[i]] for i in range(len(disc_intervals))]
if __name__ == '__main__':
# rows = 20000
# data = np.concatenate((synthetic_cube_in_cube(rows, 2, 0), np.zeros((rows, 1))), axis=1)
# data = pd.read_csv("synthetic_cases/blobs/3d_3_blobs_aligned.csv", delimiter=";", header=None, na_values='?')
# data = pd.read_csv("new_cubes/cubes_10_100_03_i.csv", delimiter=";", header=None, na_values='?')
# data = pd.read_csv("new_cubes/cubes_02_03_c.csv", delimiter=";", na_values='?', header=None)
# data = pd.read_csv('synthetic_cases/uds_new.csv', delimiter=',', header=None)
data = pd.DataFrame(dg.correlated_data(4000, 2, 0.1, dg.func3))
# data = pd.DataFrame(dg_old.correlated_data(4000, 3, 0.5, dg_old.func3))
# data = pd.DataFrame(dg.cubes(4000))
plot_data_2d(data)
# for f in [
# "2d_3_cubes_aligned_xor.csv",
# "2d_2_cubes_aligned.csv",
# "2d_2_cubes_xor.csv",
# # "3d_2_cubes_aligned.csv",
# # "3d_2_cubes_xor.csv",
# # "3d_3_cubes_aligned.csv",
# # "3d_3_cubes_aligned_xor.csv",
# # "3d_3_cubes_xor.csv",
# # "3d_4_cubes_1_aligned_xor.csv",
# # "3d_4_cubes_2_aligned.csv",
# # "3d_4_cubes_xor.csv",
# # "4d_2_cubes_aligned.csv",
# # "4d_3_cubes_aligned_xor.csv",
# # "4d_3_cubes_xor.csv",
# # "4d_4_cubes_aligned_xor.csv",
# # "4d_4_cubes_2_aligned.csv",
# # "4d_4_cubes_xor.csv",
# ]:
# save_plot_data_2d(f, pd.read_csv("synthetic_cases/cubes/" + f, delimiter=";", header=None, na_values='?'))