Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import matplotlib.pyplot as plt
import pandas as pd
import discretization_quality_measure as dq
import data_generation as dg
from mpl_toolkits.mplot3d import Axes3D
import data_generator as dg_new
import util
import numpy as np
def plot_disc(problem, method):
ideal_cuts = dq.parse_ideal_cuts("ideal_disc/cut_" + problem + ".txt")
for dist in ['ID', 'CJS']:
for threshold in ['0.3', '0.5', '0.8']:
# dtws_irr = []
for irr_feat in range(11):
fig = plt.figure()
curr = dist + "_ORIGINAL_" + threshold + "_" + problem + ("" if irr_feat == 0 else "_" + str(irr_feat))
cuts = dq.parse_ideal_cuts("logs4/" + curr + ".csv/cut.txt")
if not cuts:
continue
for i in range(len(ideal_cuts)):
ax = fig.add_subplot(len(ideal_cuts), 1, i + 1)
ax.hlines(1, -2.5, ideal_cuts[i][-1] + 0.5) # Draw a horizontal line
ax.eventplot(ideal_cuts[i], orientation='horizontal', colors='r', linelengths = 2)
ax.annotate(method.__name__ + ': ' + "{0:.2f}".format(method(ideal_cuts[i], cuts[i]))
+ ', # bins: ' + str(len(cuts[i])), xy=(-2.5, 2.2))
for cut in ideal_cuts[i]:
ax.annotate(str(cut), xy=(cut, 1), xytext=(cut-0.1, -0.2))
ax.hlines(1, -2.5, cuts[i][-1] + 0.5) # Draw a horizontal line
ax.eventplot(cuts[i], orientation='horizontal', colors='b')
for j, cut in enumerate(cuts[i]):
ax.annotate(str(cut), xy=("{0:.1f}".format(cut), 1), xytext=(cut-0.1, 1.6 if j % 2 == 0 else 0.3))
plt.axis('off')
plt.savefig(method.__name__ + "_" + curr + '.png', format='png')
def plot_data_3d(data, coloring=True):
if type(data) is not pd.DataFrame:
data = pd.DataFrame(data)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlim(-3, 3)
ax.set_ylim(-3, 3)
ax.set_zlim(-3, 3)
if coloring:
# 3d parity3 p3oblem
color_cond = {'b': data[6] == 0,
'k': data[6] == 1,
'r': data[6] == 2,
'g': data[6] == 3,
'm': data[6] == 4,
}
for c in color_cond:
ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
else:
## without coloring
ax.scatter(data[0], data[1], data[2], c='k', s=1)
#
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
plt.show()
def build_plot_data_3d(ax, data):
# data = data[np.logical_and(data[0] < 0, data[1] > 0)]
# 3d parity problem
color_cond = {'b': data[3] == 0,
'k': data[3] == 1,
'r': data[3] == 2,
'g': data[3] == 3,
}
for c in color_cond:
ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
## without coloring
# ax.scatter(data[0], data[1], data[2], c='k', s=1)
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
def save_plot_data_3d(f, data):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# data = data[np.logical_and(data[0] < 0, data[1] > 0)]
## 3d parity problem
# color_cond = {'b': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] < 0)),
# 'k': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] > 0)),
# 'g': np.logical_and(data[0] > 0, data[1] < 0),
# 'r': np.logical_and(data[0] < 0, data[1] < 0),
# 'c': np.logical_and(data[0] > 0, data[1] > 0),
# }
# for c in color_cond:
# ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
## without coloring
ax.scatter(data[0], data[1], data[2], c='k', s=1)
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
plt.savefig(f.replace('.csv', '.png'), format='png')
plt.clf()
def plot_data_2d(data):
if type(data) is not pd.DataFrame:
data = pd.DataFrame(data)
plt.scatter(data[1], data[2], s=1, c='k')
plt.xlabel("dim 0")
plt.ylabel("dim 1")
plt.show()
def save_plot_data_2d(f, data):
plt.scatter(data[0], data[1], s=1, c='k')
plt.savefig(f.replace('.csv', '.png'), format='png')
plt.clf()
def write_out_file(problem, disc_intervals, disc_points, class_labels):
lines = ['@relation ' + util.get_escaped_name(problem) + "\n\n"]
counter = [1]
for i in range(len(disc_intervals)):
lines.append(
'@attribute dim' + str(i) + ' {' + ','.join([str(j + counter[-1]) for j in disc_intervals[i]]) + '}\n')
counter.append(counter[-1] + len(disc_intervals[i]))
lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n')
lines.append('@data\n')
for i in range(len(disc_points[0])):
for j in range(len(disc_points)):
lines.append(str(disc_points[j][i] + counter[j]))
lines.append(',')
lines.append('"' + str(class_labels[i]) + '"\n')
return lines
def get_out_files(experiment_name, disc_intervals, disc_points, class_labels, relevant_features):
dat_lines = []
arff_lines = ['@relation ' + experiment_name + "\n\n"]
counter = [1]
for i in range(relevant_features):
arff_lines.append(
'@attribute dim' + str(i) + ' {' + ','.join([str(interval_id + counter[-1]) for interval_id in range(len(disc_intervals[i]))]) + '}\n')
counter.append(counter[-1] + len(disc_intervals[i]))
arff_lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n')
arff_lines.append('@data\n')
for i in range(len(disc_points[0])):
values = [str(disc_points[j][i] + counter[j]) for j in range(relevant_features)]
dat_line = ' '.join(values)
dat_lines.append(dat_line + " " + str(class_labels[i]) + '\n')
arff_line = ",".join(values)
arff_lines.append(arff_line + ',"' + str(class_labels[i]) + '"\n')
return dat_lines, arff_lines
def get_cut_file(disc_intervals):
lines = []
for i in range(len(disc_intervals)):
lines.append('dimension ' + str(i) + ' (' + str(len(disc_intervals[i])) + ' bins)\n')
for bin in disc_intervals[i]:
lines.append(str(bin[1]) + '\n')
lines.append('-------------------------------------\n')
return lines
def get_cuts(disc_intervals):
return [[bin[1] for bin in disc_intervals[i]] for i in range(len(disc_intervals))]
if __name__ == '__main__':
# rows = 20000
# data = np.concatenate((synthetic_cube_in_cube(rows, 2, 0), np.zeros((rows, 1))), axis=1)
# data = pd.read_csv("synthetic_cases/blobs/3d_3_blobs_aligned.csv", delimiter=";", header=None, na_values='?')
# data = pd.read_csv("new_cubes/cubes_10_100_03_i.csv", delimiter=";", header=None, na_values='?')
# data = pd.read_csv("new_cubes/cubes_02_03_c.csv", delimiter=";", na_values='?', header=None)
# data = pd.read_csv("new_cubes2/xorblobs_n1000_r3_off1.csv", delimiter=";", na_values='?', header=None)
data = dg_new.produce_xor_generator(3, 3, "bla", 5000, offset=(0, 0), distribution="gauss").build()
# data = dg_new.produce_cube_generator(3, 0, 1, 2, 'bla', 5000, "gauss").build()
print(data[1])
data = data[0]
# data = pd.read_csv('synthetic_cases/uds_new.csv', delimiter=',', header=None)
# data = pd.DataFrame(dg.correlated_data(4000, 2, 0.1, dg.func3))
# data = pd.DataFrame(dg_new.produce_xor_generator(3, 0, "bla").build()[0])
# data = pd.DataFrame(dg_old.correlated_data(4000, 3, 0.5, dg_old.func3))
# data = pd.DataFrame(dg.cubes(4000))
plot_data_3d(data)
# for f in [
# "2d_3_cubes_aligned_xor.csv",
# "2d_2_cubes_aligned.csv",
# "2d_2_cubes_xor.csv",
# # "3d_2_cubes_aligned.csv",
# # "3d_2_cubes_xor.csv",
# # "3d_3_cubes_aligned.csv",
# # "3d_3_cubes_aligned_xor.csv",
# # "3d_3_cubes_xor.csv",
# # "3d_4_cubes_1_aligned_xor.csv",
# # "3d_4_cubes_2_aligned.csv",
# # "3d_4_cubes_xor.csv",
# # "4d_2_cubes_aligned.csv",
# # "4d_3_cubes_aligned_xor.csv",
# # "4d_3_cubes_xor.csv",
# # "4d_4_cubes_aligned_xor.csv",
# # "4d_4_cubes_2_aligned.csv",
# # "4d_4_cubes_xor.csv",
# ]:
# save_plot_data_2d(f, pd.read_csv("synthetic_cases/cubes/" + f, delimiter=";", header=None, na_values='?'))