Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
everyday-eye-movements-predict-personality/07_evaluation_across_contexts.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
155 lines (130 sloc)
8.74 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from config import conf | |
import os, sys | |
from config import names as gs | |
import pandas as pd | |
truth = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',') | |
# all comparisons to perform. Each has | |
# a name, | |
# two annotation values that determine if classifiers trained on all data or on specific subsets only will be examined; | |
# names for both tasks to compare | |
comparisons = dict({'split halves': [conf.annotation_all, conf.annotation_all, 'first half', 'second half'], | |
'two ways': [conf.annotation_ways, conf.annotation_ways, 'way there', 'way back'], | |
'way vs shop in general classifier': [conf.annotation_all, conf.annotation_all, 'both ways' ,'shop'], | |
'way vs shop in specialised classifier': [conf.annotation_ways, conf.annotation_shop, 'both ways', 'shop'], | |
'way in specialised classifier vs way in general classifier': [conf.annotation_ways, conf.annotation_all, 'both ways', 'both ways'], | |
'shop in specialised classifier vs shop in general classifier': [conf.annotation_shop, conf.annotation_all, 'shop', 'shop'] | |
}) | |
def get_majority_vote(predictions): | |
if len(predictions) == 0: | |
return -1 | |
(values, counts) = np.unique(predictions, return_counts=True) | |
ind = np.argmax(counts) | |
return values[ind] | |
def get_average_correlation(predA, predB, m_iter): | |
""" | |
:param predA: predictions for task A, n_participants x m_iter | |
:param predB: predictions for task B, n_participants x m_iter | |
:return: | |
""" | |
correlations = [] | |
for si in xrange(0, m_iter): | |
if predB.ndim == 1: | |
if np.sum(predA[:,si]) > 0: | |
A = predA[:,si] | |
B = predB | |
consider = (A>0) | |
A = A[consider] | |
B = B[consider] | |
else: | |
continue | |
else: | |
if np.sum(predA[:,si]) > 0 and (np.sum(predB[:,si]) > 0): | |
A = predA[:,si] | |
B = predB[:,si] | |
consider = (A>0) & (B>0) | |
A = A[consider] | |
B = B[consider] | |
else: | |
continue | |
correlation = np.corrcoef(np.array([A, B]))[0][1] | |
correlations.append(correlation) | |
avg = np.tanh(np.mean(np.arctanh(np.array(correlations)))) | |
return avg | |
if __name__ == "__main__": | |
# check if the output target folder already exists and create if not | |
if not os.path.exists(conf.figure_folder): | |
os.mkdir(conf.figure_folder) | |
# collect masks for each participant, annotation (all data, shop, way), window size and subset in question (e.g. first half, or way to the shop) | |
# each mask is True for samples of a particular participant and subset; False for all others | |
window_masks = [] | |
for wsi in xrange(0, len(conf.all_window_sizes)): | |
x_file, y_file, id_file = conf.get_merged_feature_files(conf.all_window_sizes[wsi]) | |
for annotation_value in conf.annotation_values: | |
ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int) | |
if annotation_value == conf.annotation_shop: | |
ids_ws = ids_ws[ids_ws[:, 1] == conf.time_window_annotation_shop, :] | |
elif annotation_value == conf.annotation_ways: | |
ids_ws = ids_ws[(ids_ws[:, 1] == conf.time_window_annotation_wayI) | (ids_ws[:, 1] == conf.time_window_annotation_wayII), :] | |
for p in xrange(0, conf.n_participants): | |
ids_ws_p = ids_ws[(ids_ws[:, 0] == p), :] | |
window_masks.append([annotation_value, p, wsi, 'first half', ids_ws_p[:, 2] == conf.time_window_annotation_halfI]) | |
window_masks.append([annotation_value, p, wsi, 'second half', ids_ws_p[:, 2] == conf.time_window_annotation_halfII]) | |
window_masks.append([annotation_value, p, wsi, 'way there', ids_ws_p[:, 1] == conf.time_window_annotation_wayI]) | |
window_masks.append([annotation_value, p, wsi, 'way back', ids_ws_p[:, 1] == conf.time_window_annotation_wayII]) | |
window_masks.append([annotation_value, p, wsi, 'shop', ids_ws_p[:, 1] == conf.time_window_annotation_shop]) | |
window_masks.append([annotation_value, p, wsi, 'both ways', np.logical_or(ids_ws_p[:, 1] == conf.time_window_annotation_wayI,ids_ws_p[:, 1] == conf.time_window_annotation_wayII)]) | |
window_masks_df = pd.DataFrame(window_masks, columns=['annotation', 'participant', 'window size index', 'subtask', 'mask']) | |
# collect predictions for each participant and each setting that is interesting for one of the comparisons | |
# Results are directly written into figures/table1-5.csv | |
with open(conf.figure_folder + '/table1-5.csv', 'w') as f: | |
f.write('comparison') | |
for trait in xrange(0, conf.n_traits): | |
f.write(',' + conf.medium_traitlabels[trait]) | |
f.write('\n') | |
for comp_title, (annotation_value_I, annotation_value_II, subtaskI, subtaskII) in comparisons.items(): | |
f.write(comp_title) | |
result_filename = conf.result_folder + '/predictions_' + comp_title.replace(' ','_') + '.npz' | |
if not os.path.exists(result_filename): | |
print 'computing data for', comp_title | |
print 'Note taht this might take a while - if the script is run again, intermediate results will be available and speed up all computations.' | |
predictions_I = np.zeros((conf.n_participants, conf.n_traits, conf.max_n_iter), dtype=int) | |
predictions_II = np.zeros((conf.n_participants, conf.n_traits, conf.max_n_iter), dtype=int) | |
for trait in xrange(0, conf.n_traits): | |
for si in xrange(0, conf.max_n_iter): | |
filenameI = conf.get_result_filename(annotation_value_I, trait, False, si, add_suffix=True) | |
filenameII = conf.get_result_filename(annotation_value_II, trait, False, si, add_suffix=True) | |
if os.path.exists(filenameI) and os.path.exists(filenameII): | |
dataI = np.load(filenameI) | |
detailed_predictions_I = dataI['detailed_predictions'] | |
chosen_window_indices_I = dataI['chosen_window_indices'] | |
dataII = np.load(filenameII) | |
detailed_predictions_II = dataII['detailed_predictions'] | |
chosen_window_indices_II = dataII['chosen_window_indices'] | |
for p, window_index_I, window_index_II, local_detailed_preds_I, local_detailed_preds_II in zip(xrange(0, conf.n_participants), chosen_window_indices_I, chosen_window_indices_II, detailed_predictions_I, detailed_predictions_II): | |
maskI = window_masks_df[(window_masks_df.annotation == annotation_value_I) & | |
(window_masks_df.participant == p) & | |
(window_masks_df['window size index'] == window_index_I) & | |
(window_masks_df.subtask == subtaskI) | |
].as_matrix(columns=['mask'])[0][0] | |
maskII = window_masks_df[(window_masks_df.annotation == annotation_value_II) & | |
(window_masks_df.participant == p) & | |
(window_masks_df['window size index'] == window_index_II) & | |
(window_masks_df.subtask == subtaskII) | |
].as_matrix(columns=['mask'])[0][0] | |
predictions_I[p, trait, si] = get_majority_vote(np.array(local_detailed_preds_I)[maskI]) | |
predictions_II[p, trait, si] = get_majority_vote(np.array(local_detailed_preds_II)[maskII]) | |
else: | |
print 'did not find', filenameI, 'or', filenameII | |
sys.exit(1) | |
np.savez(result_filename, predictions_I=predictions_I, predictions_II=predictions_II) | |
else: | |
data = np.load(result_filename) | |
predictions_I = data['predictions_I'] | |
predictions_II = data['predictions_II'] | |
# predictions_I are predictions from one context, predictions_II is the other context | |
# compute their average correlation and write it to file | |
for t in xrange(0, conf.n_traits): | |
corrI = get_average_correlation(predictions_I[:, t, :], predictions_II[:, t, :], 100) | |
f.write(','+'%.2f'%corrI) | |
f.write('\n') |