Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ipd_extended/experiments_logging.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
220 lines (184 sloc)
8.62 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import pandas as pd | |
import discretization_quality_measure as dq | |
import data_generation as dg | |
from mpl_toolkits.mplot3d import Axes3D | |
import data_generator as dg_new | |
import util | |
import numpy as np | |
def plot_disc(problem, method): | |
ideal_cuts = dq.parse_ideal_cuts("ideal_disc/cut_" + problem + ".txt") | |
for dist in ['ID', 'CJS']: | |
for threshold in ['0.3', '0.5', '0.8']: | |
# dtws_irr = [] | |
for irr_feat in range(11): | |
fig = plt.figure() | |
curr = dist + "_ORIGINAL_" + threshold + "_" + problem + ("" if irr_feat == 0 else "_" + str(irr_feat)) | |
cuts = dq.parse_ideal_cuts("logs4/" + curr + ".csv/cut.txt") | |
if not cuts: | |
continue | |
for i in range(len(ideal_cuts)): | |
ax = fig.add_subplot(len(ideal_cuts), 1, i + 1) | |
ax.hlines(1, -2.5, ideal_cuts[i][-1] + 0.5) # Draw a horizontal line | |
ax.eventplot(ideal_cuts[i], orientation='horizontal', colors='r', linelengths = 2) | |
ax.annotate(method.__name__ + ': ' + "{0:.2f}".format(method(ideal_cuts[i], cuts[i])) | |
+ ', # bins: ' + str(len(cuts[i])), xy=(-2.5, 2.2)) | |
for cut in ideal_cuts[i]: | |
ax.annotate(str(cut), xy=(cut, 1), xytext=(cut-0.1, -0.2)) | |
ax.hlines(1, -2.5, cuts[i][-1] + 0.5) # Draw a horizontal line | |
ax.eventplot(cuts[i], orientation='horizontal', colors='b') | |
for j, cut in enumerate(cuts[i]): | |
ax.annotate(str(cut), xy=("{0:.1f}".format(cut), 1), xytext=(cut-0.1, 1.6 if j % 2 == 0 else 0.3)) | |
plt.axis('off') | |
plt.savefig(method.__name__ + "_" + curr + '.png', format='png') | |
def plot_data_3d(data, coloring=True): | |
if type(data) is not pd.DataFrame: | |
data = pd.DataFrame(data) | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
ax.set_xlim(-3, 3) | |
ax.set_ylim(-3, 3) | |
ax.set_zlim(-3, 3) | |
if coloring: | |
# 3d parity3 p3oblem | |
color_cond = {'b': data[6] == 0, | |
'k': data[6] == 1, | |
'r': data[6] == 2, | |
'g': data[6] == 3, | |
'm': data[6] == 4, | |
} | |
for c in color_cond: | |
ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1) | |
else: | |
## without coloring | |
ax.scatter(data[0], data[1], data[2], c='k', s=1) | |
# | |
ax.set_xlabel('X0') | |
ax.set_ylabel('X1') | |
ax.set_zlabel('X2') | |
plt.show() | |
def build_plot_data_3d(ax, data): | |
# data = data[np.logical_and(data[0] < 0, data[1] > 0)] | |
# 3d parity problem | |
color_cond = {'b': data[3] == 0, | |
'k': data[3] == 1, | |
'r': data[3] == 2, | |
'g': data[3] == 3, | |
} | |
for c in color_cond: | |
ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1) | |
## without coloring | |
# ax.scatter(data[0], data[1], data[2], c='k', s=1) | |
ax.set_xlabel('X0') | |
ax.set_ylabel('X1') | |
ax.set_zlabel('X2') | |
def save_plot_data_3d(f, data): | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
# data = data[np.logical_and(data[0] < 0, data[1] > 0)] | |
## 3d parity problem | |
# color_cond = {'b': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] < 0)), | |
# 'k': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] > 0)), | |
# 'g': np.logical_and(data[0] > 0, data[1] < 0), | |
# 'r': np.logical_and(data[0] < 0, data[1] < 0), | |
# 'c': np.logical_and(data[0] > 0, data[1] > 0), | |
# } | |
# for c in color_cond: | |
# ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1) | |
## without coloring | |
ax.scatter(data[0], data[1], data[2], c='k', s=1) | |
ax.set_xlabel('X0') | |
ax.set_ylabel('X1') | |
ax.set_zlabel('X2') | |
plt.savefig(f.replace('.csv', '.png'), format='png') | |
plt.clf() | |
def plot_data_2d(data): | |
if type(data) is not pd.DataFrame: | |
data = pd.DataFrame(data) | |
plt.scatter(data[1], data[2], s=1, c='k') | |
plt.xlabel("dim 0") | |
plt.ylabel("dim 1") | |
plt.show() | |
def save_plot_data_2d(f, data): | |
plt.scatter(data[0], data[1], s=1, c='k') | |
plt.savefig(f.replace('.csv', '.png'), format='png') | |
plt.clf() | |
def write_out_file(problem, disc_intervals, disc_points, class_labels): | |
lines = ['@relation ' + util.get_escaped_name(problem) + "\n\n"] | |
counter = [1] | |
for i in range(len(disc_intervals)): | |
lines.append( | |
'@attribute dim' + str(i) + ' {' + ','.join([str(j + counter[-1]) for j in disc_intervals[i]]) + '}\n') | |
counter.append(counter[-1] + len(disc_intervals[i])) | |
lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n') | |
lines.append('@data\n') | |
for i in range(len(disc_points[0])): | |
for j in range(len(disc_points)): | |
lines.append(str(disc_points[j][i] + counter[j])) | |
lines.append(',') | |
lines.append('"' + str(class_labels[i]) + '"\n') | |
return lines | |
def get_out_files(experiment_name, disc_intervals, disc_points, class_labels, relevant_features): | |
dat_lines = [] | |
arff_lines = ['@relation ' + experiment_name + "\n\n"] | |
counter = [1] | |
for i in range(relevant_features): | |
arff_lines.append( | |
'@attribute dim' + str(i) + ' {' + ','.join([str(interval_id + counter[-1]) for interval_id in range(len(disc_intervals[i]))]) + '}\n') | |
counter.append(counter[-1] + len(disc_intervals[i])) | |
arff_lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n') | |
arff_lines.append('@data\n') | |
for i in range(len(disc_points[0])): | |
values = [str(disc_points[j][i] + counter[j]) for j in range(relevant_features)] | |
dat_line = ' '.join(values) | |
dat_lines.append(dat_line + " " + str(class_labels[i]) + '\n') | |
arff_line = ",".join(values) | |
arff_lines.append(arff_line + ',"' + str(class_labels[i]) + '"\n') | |
return dat_lines, arff_lines | |
def get_cut_file(disc_intervals): | |
lines = [] | |
for i in range(len(disc_intervals)): | |
lines.append('dimension ' + str(i) + ' (' + str(len(disc_intervals[i])) + ' bins)\n') | |
for bin in disc_intervals[i]: | |
lines.append(str(bin[1]) + '\n') | |
lines.append('-------------------------------------\n') | |
return lines | |
def get_cuts(disc_intervals): | |
return [[bin[1] for bin in disc_intervals[i]] for i in range(len(disc_intervals))] | |
if __name__ == '__main__': | |
# rows = 20000 | |
# data = np.concatenate((synthetic_cube_in_cube(rows, 2, 0), np.zeros((rows, 1))), axis=1) | |
# data = pd.read_csv("synthetic_cases/blobs/3d_3_blobs_aligned.csv", delimiter=";", header=None, na_values='?') | |
# data = pd.read_csv("new_cubes/cubes_10_100_03_i.csv", delimiter=";", header=None, na_values='?') | |
# data = pd.read_csv("new_cubes/cubes_02_03_c.csv", delimiter=";", na_values='?', header=None) | |
# data = pd.read_csv("new_cubes2/xorblobs_n1000_r3_off1.csv", delimiter=";", na_values='?', header=None) | |
data = dg_new.produce_xor_generator(3, 3, "bla", 5000, offset=(0, 0), distribution="gauss").build() | |
# data = dg_new.produce_cube_generator(3, 0, 1, 2, 'bla', 5000, "gauss").build() | |
print(data[1]) | |
data = data[0] | |
# data = pd.read_csv('synthetic_cases/uds_new.csv', delimiter=',', header=None) | |
# data = pd.DataFrame(dg.correlated_data(4000, 2, 0.1, dg.func3)) | |
# data = pd.DataFrame(dg_new.produce_xor_generator(3, 0, "bla").build()[0]) | |
# data = pd.DataFrame(dg_old.correlated_data(4000, 3, 0.5, dg_old.func3)) | |
# data = pd.DataFrame(dg.cubes(4000)) | |
plot_data_3d(data) | |
# for f in [ | |
# "2d_3_cubes_aligned_xor.csv", | |
# "2d_2_cubes_aligned.csv", | |
# "2d_2_cubes_xor.csv", | |
# # "3d_2_cubes_aligned.csv", | |
# # "3d_2_cubes_xor.csv", | |
# # "3d_3_cubes_aligned.csv", | |
# # "3d_3_cubes_aligned_xor.csv", | |
# # "3d_3_cubes_xor.csv", | |
# # "3d_4_cubes_1_aligned_xor.csv", | |
# # "3d_4_cubes_2_aligned.csv", | |
# # "3d_4_cubes_xor.csv", | |
# # "4d_2_cubes_aligned.csv", | |
# # "4d_3_cubes_aligned_xor.csv", | |
# # "4d_3_cubes_xor.csv", | |
# # "4d_4_cubes_aligned_xor.csv", | |
# # "4d_4_cubes_2_aligned.csv", | |
# # "4d_4_cubes_xor.csv", | |
# ]: | |
# save_plot_data_2d(f, pd.read_csv("synthetic_cases/cubes/" + f, delimiter=";", header=None, na_values='?')) |