Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
everyday-eye-movements-predict-personality/08_descriptive.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
99 lines (84 sloc)
3.52 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from config import names as gs | |
from config import conf | |
import sys | |
import math | |
import os | |
def get_stats(): | |
annotation_times = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)[:, 1:] | |
shop_duration = annotation_times[:, 1] - annotation_times[:, 0] | |
print 'Time spent in the shop:' | |
print 'MEAN', np.mean(shop_duration/60.), 'min' | |
print 'STD', np.std(shop_duration/60.), 'min' | |
def get_feature_correlations(): | |
# find the window size that was most frequently chosen | |
hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int) | |
for trait in xrange(0, conf.n_traits): | |
for si in xrange(0, 100): | |
filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True) | |
if os.path.exists(filename): | |
data = np.load(filename) | |
chosen_window_indices = data['chosen_window_indices'] | |
hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1)) | |
hist_sum += hist | |
else: | |
print 'did not find', filename | |
ws = conf.all_window_sizes[np.argmax(hist_sum)] | |
# load features for the most frequently chosen time window | |
x_file, y_file, id_file = conf.get_merged_feature_files(ws) | |
x_ws = np.genfromtxt(x_file, delimiter=',', skip_header=1) | |
ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)[:,0] | |
y = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',') | |
y_ws = np.genfromtxt(y_file, delimiter=',', skip_header=1).astype(int) | |
# compute average feature per person | |
avg_x_ws = np.zeros((conf.n_participants, conf.max_n_feat)) | |
for p in xrange(0,conf.n_participants): | |
avg_x_ws[p,:] = np.mean(x_ws[ids_ws == p, :], axis=0) | |
feature_correlations_avg = [] | |
for fi in xrange(0, conf.max_n_feat): | |
C_avg = np.corrcoef(y.transpose(), avg_x_ws[:, fi])[-1][:-1] | |
feature_correlations_avg.append(C_avg) | |
feature_correlations_avg = np.array(feature_correlations_avg) | |
# find the 5th to highest correlation for each trait and write them into a .tex table - see Table 4 in SI | |
n = 15 | |
highest_correlated_features = [] | |
highest_correlated_features_lists = [] | |
highest_correlated_features_names = [] | |
for t in xrange(0, conf.n_traits): | |
hcf = feature_correlations_avg[:,t].argsort()[-n:] | |
locallist = [] | |
for f in hcf: | |
if f not in highest_correlated_features: | |
highest_correlated_features.append(f) | |
highest_correlated_features_names.append(gs.full_long_label_list[f].lower()) | |
locallist.append(f) | |
highest_correlated_features_lists.append(locallist) | |
features = zip(highest_correlated_features_names, highest_correlated_features) | |
highest_correlated_features = [y for (x,y) in sorted(features)] | |
#highest_correlated_features.sort() | |
filename = conf.figure_folder + '/table4.tex' | |
print len(highest_correlated_features) | |
with open(filename, 'w') as f: | |
f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI') | |
f.write('\\\\\n\hline\n') | |
for fi in highest_correlated_features: | |
f.write(gs.full_long_label_list[fi]) | |
for t in xrange(0, conf.n_traits): | |
fc = feature_correlations_avg[fi,t] | |
if math.isnan(fc): | |
f.write('&-') | |
elif fi in highest_correlated_features_lists[t]: | |
f.write('&\\textbf{'+'%.2f}'%fc) | |
else: | |
f.write('&'+'%.2f'%fc) | |
f.write('\\\\\n') | |
print filename, 'written' | |
if __name__ == "__main__": | |
import os | |
if not os.path.exists(conf.figure_folder): | |
os.makedirs(conf.figure_folder) | |
get_stats() # prints statistics on the time participants spent inside the shop | |
get_feature_correlations() # Table 4 |