Skip to content
Permalink
25858984cf
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
166 lines (139 sloc) 6.64 KB
import matplotlib.pyplot as plt
import pandas as pd
import discretization_quality_measure as dq
from mpl_toolkits.mplot3d import Axes3D
import util
def plot_disc(problem, method):
ideal_cuts = dq.parse_cuts("ideal_disc/cut_" + problem + ".txt")
for dist in ['ID', 'CJS']:
for threshold in ['0.3', '0.5', '0.8']:
# dtws_irr = []
for irr_feat in range(11):
fig = plt.figure()
curr = dist + "_ORIGINAL_" + threshold + "_" + problem + ("" if irr_feat == 0 else "_" + str(irr_feat))
cuts = dq.parse_cuts("logs4/" + curr + ".csv/cut.txt")
if not cuts:
continue
for i in range(len(ideal_cuts)):
ax = fig.add_subplot(len(ideal_cuts), 1, i + 1)
ax.hlines(1, -2.5, ideal_cuts[i][-1] + 0.5) # Draw a horizontal line
ax.eventplot(ideal_cuts[i], orientation='horizontal', colors='r', linelengths = 2)
ax.annotate(method.__name__ + ': ' + "{0:.2f}".format(method(ideal_cuts[i], cuts[i]))
+ ', # bins: ' + str(len(cuts[i])), xy=(-2.5, 2.2))
for cut in ideal_cuts[i]:
ax.annotate(str(cut), xy=(cut, 1), xytext=(cut-0.1, -0.2))
ax.hlines(1, -2.5, cuts[i][-1] + 0.5) # Draw a horizontal line
ax.eventplot(cuts[i], orientation='horizontal', colors='b')
for j, cut in enumerate(cuts[i]):
ax.annotate(str(cut), xy=("{0:.1f}".format(cut), 1), xytext=(cut-0.1, 1.6 if j % 2 == 0 else 0.3))
plt.axis('off')
plt.savefig(method.__name__ + "_" + curr + '.png', format='png')
def plot_data_3d(data):
if type(data) is not pd.DataFrame:
data = pd.DataFrame(data)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# data = data[np.logical_and(data[0] < 0, data[1] > 0)]
## 3d parity problem
# color_cond = {'b': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] < 0)),
# 'k': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] > 0)),
# 'g': np.logical_and(data[0] > 0, data[1] < 0),
# 'r': np.logical_and(data[0] < 0, data[1] < 0),
# 'c': np.logical_and(data[0] > 0, data[1] > 0),
# }
# for c in color_cond:
# ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
## without coloring
ax.scatter(data[0], data[1], data[2], c='k', s=1)
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
plt.show()
def save_plot_data_3d(f, data):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# data = data[np.logical_and(data[0] < 0, data[1] > 0)]
## 3d parity problem
# color_cond = {'b': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] < 0)),
# 'k': np.logical_and(data[0] < 0, np.logical_and(data[1] > 0, data[2] > 0)),
# 'g': np.logical_and(data[0] > 0, data[1] < 0),
# 'r': np.logical_and(data[0] < 0, data[1] < 0),
# 'c': np.logical_and(data[0] > 0, data[1] > 0),
# }
# for c in color_cond:
# ax.scatter(data[0][color_cond[c]], data[1][color_cond[c]], data[2][color_cond[c]], c=c, s=1)
## without coloring
ax.scatter(data[0], data[1], data[2], c='k', s=1)
ax.set_xlabel('X0')
ax.set_ylabel('X1')
ax.set_zlabel('X2')
plt.savefig(f.replace('.csv', '.png'), format='png')
plt.clf()
def plot_data_2d(data):
plt.scatter(data[0], data[3], s=1, c='k')
plt.xlabel("dim 0")
plt.ylabel("dim 1")
plt.show()
def save_plot_data_2d(f, data):
plt.scatter(data[0], data[1], s=1, c='k')
plt.savefig(f.replace('.csv', '.png'), format='png')
plt.clf()
def write_out_file(problem, disc_intervals, disc_points, class_labels):
lines = ['@relation ' + util.get_escaped_name(problem) + "\n\n"]
counter = [1]
for i in range(len(disc_intervals)):
lines.append(
'@attribute dim' + str(i) + ' {' + ','.join([str(j + counter[-1]) for j in disc_intervals[i]]) + '}\n')
counter.append(counter[-1] + len(disc_intervals[i]))
lines.append('@attribute class {' + ','.join(['"' + str(i) + '"' for i in class_labels.unique()]) + '}\n\n')
lines.append('@data\n')
for i in range(len(disc_points[0])):
for j in range(len(disc_points)):
lines.append(str(disc_points[j][i] + counter[j]))
lines.append(',')
lines.append('"' + str(class_labels[i]) + '"\n')
return lines
def write_outdat_file(disc_intervals, disc_points, class_labels, relevant_features):
lines = []
counter = [1]
for i in range(len(disc_intervals)):
counter.append(counter[-1] + len(disc_intervals[i]))
for i in range(len(disc_points[0])):
line = ' '.join([str(disc_points[j][i] + counter[j]) for j in range(relevant_features)])
lines.append(line + " " + str(class_labels[i]) + '\n')
return lines
def write_cut_file(disc_intervals):
lines = []
for i in range(len(disc_intervals)):
lines.append('dimension ' + str(i) + ' (' + str(len(disc_intervals[i])) + ' bins)\n')
for bin in disc_intervals[i]:
lines.append(str(disc_intervals[i][bin][1]) + '\n')
lines.append('-------------------------------------\n')
return lines
if __name__ == '__main__':
# rows = 20000
# data = np.concatenate((synthetic_cube_in_cube(rows, 2, 0), np.zeros((rows, 1))), axis=1)
# data = pd.read_csv("synthetic_cases/blobs/3d_3_blobs_aligned.csv", delimiter=";", header=None, na_values='?')
data = pd.read_csv("new_cubes/cubes_10_100_03_i.csv", delimiter=";", header=None, na_values='?')
# data = pd.DataFrame(dg.cubes(4000))
plot_data_3d(data)
# for f in [
# "2d_3_cubes_aligned_xor.csv",
# "2d_2_cubes_aligned.csv",
# "2d_2_cubes_xor.csv",
# # "3d_2_cubes_aligned.csv",
# # "3d_2_cubes_xor.csv",
# # "3d_3_cubes_aligned.csv",
# # "3d_3_cubes_aligned_xor.csv",
# # "3d_3_cubes_xor.csv",
# # "3d_4_cubes_1_aligned_xor.csv",
# # "3d_4_cubes_2_aligned.csv",
# # "3d_4_cubes_xor.csv",
# # "4d_2_cubes_aligned.csv",
# # "4d_3_cubes_aligned_xor.csv",
# # "4d_3_cubes_xor.csv",
# # "4d_4_cubes_aligned_xor.csv",
# # "4d_4_cubes_2_aligned.csv",
# # "4d_4_cubes_xor.csv",
# ]:
# save_plot_data_2d(f, pd.read_csv("synthetic_cases/cubes/" + f, delimiter=";", header=None, na_values='?'))