Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
feature extraction code
- Loading branch information
Sabrina Hoppe
committed
May 5, 2018
1 parent
a20084f
commit cebde1be1761205674b0ba79793734f1374b6a23
Showing
9 changed files
with
1,279 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import numpy as np | ||
import os | ||
from config import conf as conf | ||
from featureExtraction import gaze_analysis as ga | ||
import threading | ||
import getopt | ||
import sys | ||
from config import names as gs | ||
|
||
def compute_sliding_window_features(participant, ws, gazeAnalysis_instance): | ||
""" | ||
calls the gazeAnalysis instance it was given, calls it to get features and saves those to file | ||
""" | ||
window_features, window_times = gazeAnalysis_instance.get_window_features(ws, conf.get_step_size(ws)) | ||
np.save(conf.get_window_features_file(participant, ws), window_features) | ||
np.save(conf.get_window_times_file(participant, ws), window_times) | ||
|
||
if __name__ == "__main__": | ||
for p in xrange(0,conf.n_participants): | ||
threads = [] # one thread per time window will be used and collected in this list | ||
|
||
# create data folder, plus one subfolder for participant p | ||
if not os.path.exists(conf.get_feature_folder(p)): | ||
os.makedirs(conf.get_feature_folder(p)) | ||
|
||
# make sure all relevant raw data files exist in the right folder | ||
gaze_file = conf.get_data_folder(p) + '/gaze_positions.csv' | ||
pupil_diameter_file = conf.get_data_folder(p) + '/pupil_diameter.csv' | ||
events_file = conf.get_data_folder(p) + '/events.csv' | ||
assert os.path.exists(gaze_file) and os.path.exists(pupil_diameter_file) and os.path.exists(events_file) | ||
|
||
# load relevant data | ||
gaze = np.genfromtxt(gaze_file, delimiter=',', skip_header=1) | ||
pupil_diameter = np.genfromtxt(pupil_diameter_file, delimiter=',', skip_header=1) | ||
events = np.genfromtxt(events_file, delimiter=',', skip_header=1, dtype=str) | ||
|
||
# create instance of gazeAnalysis class that will be used for feature extraction | ||
# this already does some initial computation that will be useful for all window sizes: | ||
extractor = ga.gazeAnalysis(gaze, conf.fixation_radius_threshold, conf.fixation_duration_threshold, | ||
conf.saccade_min_velocity, conf.max_saccade_duration, | ||
pupil_diameter=pupil_diameter, event_strings=events) | ||
|
||
# compute sliding window features by creating one thread per window size | ||
for window_size in conf.all_window_sizes: | ||
if not os.path.exists(conf.get_window_features_file(p, window_size)): | ||
thread = threading.Thread(target=compute_sliding_window_features, args=(p, window_size, extractor)) | ||
thread.start() | ||
threads.append(thread) | ||
|
||
for t in threads: | ||
t.join() | ||
|
||
print 'finished all features for participant', p | ||
|
||
# Merge the features from all participants into three files per window_size: | ||
# merged_features includes all features | ||
# merged_traits contains the ground truth personality score ranges | ||
# merged_ids contains the participant number and context (way, shop, half of the recording) | ||
|
||
# load ground truth from info folder: | ||
binned_personality = np.genfromtxt(conf.binned_personality_file, delimiter=',', skip_header=1) | ||
trait_labels = np.loadtxt(conf.binned_personality_file, delimiter=',', dtype=str)[0,:] | ||
annotation = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1) | ||
|
||
for window_size in conf.all_window_sizes: | ||
print 'merging window size', window_size | ||
|
||
windowfeats_subtask_all = [] | ||
windowfeats_subtask_ids = [] | ||
windowfeats_subtask_all_y = [] | ||
|
||
for p in xrange(0, conf.n_participants): | ||
featfilename = conf.get_window_features_file(p, window_size) | ||
timesfilename = conf.get_window_times_file(p, window_size) | ||
if os.path.exists(featfilename) and os.path.exists(timesfilename): | ||
data = np.load(featfilename).tolist() | ||
windowfeats_subtask_all.extend(data) | ||
windowfeats_subtask_all_y.extend([binned_personality[p, 1:]] * len(data)) | ||
|
||
times = np.load(timesfilename)[:, 2:] | ||
ann = annotation[p,1:] | ||
|
||
ids_annotation = np.zeros((len(data), 3), dtype=int) # person, way/shop, half | ||
ids_annotation[:,0] = p | ||
ids_annotation[(times[:,1] < ann[0]),1] = conf.time_window_annotation_wayI | ||
ids_annotation[(times[:,0] > ann[0]) & (times[:,1] < ann[1]),1] = conf.time_window_annotation_shop | ||
ids_annotation[(times[:,0] > ann[1]),1] = conf.time_window_annotation_wayII | ||
ids_annotation[:(len(data)/2), 2] = conf.time_window_annotation_halfI | ||
ids_annotation[(len(data)/2):, 2] = conf.time_window_annotation_halfII | ||
|
||
windowfeats_subtask_ids.extend(ids_annotation.tolist()) | ||
else: | ||
print 'did not find ', featfilename | ||
sys.exit(1) | ||
|
||
ids = np.array(windowfeats_subtask_ids) | ||
x = np.array(windowfeats_subtask_all, dtype=float) | ||
y = np.array(windowfeats_subtask_all_y) | ||
f1, f2, f3 = conf.get_merged_feature_files(window_size) | ||
|
||
np.savetxt(f1, x, delimiter=',', header=','.join(gs.full_long_label_list), comments='') | ||
np.savetxt(f2, y, delimiter=',', header=','.join(trait_labels), comments='') | ||
np.savetxt(f3, ids, delimiter=',', header='Participant ID', comments='') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import numpy as np | ||
|
||
# global parameters | ||
n_participants = 42 | ||
n_traits = 7 | ||
max_n_feat = 207 | ||
max_n_iter = 100 | ||
all_window_sizes = [5, 15, 30, 45, 60, 75, 90, 105, 120, 135] | ||
all_shop_window_sizes = [5, 15] # at least 3/4 of the people have a time window in these times | ||
|
||
# cross validation paramters | ||
n_inner_folds = 3 | ||
n_outer_folds = 5 | ||
|
||
# Random Forest Parameters | ||
tree_max_features = 15 | ||
tree_max_depth = 5 | ||
n_estimators = 100 | ||
max_n_jobs = 5 | ||
|
||
# given a window size, determine step size correctly for even and odd numbers | ||
def get_step_size(window_size): | ||
step_size = window_size / 2.0 | ||
if step_size * 10 % 2 == 0: | ||
step_size = int(step_size) | ||
return step_size | ||
|
||
# relative paths | ||
data_folder = 'data' | ||
info_folder = 'info' | ||
feature_folder = 'features' | ||
result_folder = 'results' | ||
figure_folder = 'figures' | ||
annotation_path = info_folder + '/annotation.csv' | ||
binned_personality_file = info_folder + '/binned_personality.csv' | ||
personality_sex_age_file = info_folder + '/personality_sex_age.csv' | ||
|
||
# load the personality trait names from file and map them to abbreviations | ||
traitlabels = np.loadtxt(binned_personality_file, delimiter=',', dtype=str)[0, 1:] | ||
def get_abbr(s): | ||
return ''.join(item[0] for item in s.split() if item[0].isupper()) | ||
medium_traitlabels = [get_abbr(s) if (" " in s) else s for s in traitlabels] | ||
short_traitlabels = [''.join(item[0] for item in tl.split() if item[0].isupper()) for tl in traitlabels] | ||
|
||
|
||
# dynamically create relative paths for result files to create | ||
def get_result_folder(annotation_val): | ||
return result_folder + '/A' + str(annotation_val) | ||
|
||
def get_result_filename(annotation_val, trait, shuffle_labels, i, add_suffix=False): | ||
filename = get_result_folder(annotation_val) + '/' + short_traitlabels[trait] | ||
if shuffle_labels: | ||
filename += '_rnd' | ||
filename += '_' + str(i).zfill(3) | ||
if add_suffix: | ||
filename += '.npz' | ||
return filename | ||
|
||
def get_feature_folder(participant): | ||
return feature_folder + '/Participant' + str(participant).zfill(2) | ||
|
||
def get_merged_feature_files(window_size): | ||
return feature_folder + '/merged_features_' + str(window_size) + '.csv', feature_folder + '/merged_traits_' + str(window_size) + '.csv', feature_folder + '/merged_ids_' + str(window_size) + '.csv' | ||
|
||
def get_data_folder(participant): | ||
return data_folder + '/Participant' + str(participant).zfill(2) | ||
|
||
def get_window_times_file(participant, window_size): | ||
return get_feature_folder(participant) + "/window_times_" + str(window_size) + '.npy' | ||
|
||
def get_window_features_file(participant, window_size): | ||
return get_feature_folder(participant) + "/window_features_" + str(window_size) + '.npy' | ||
|
||
def get_overall_features_file(participant): | ||
return get_feature_folder(participant) + "/overall_features.npy" | ||
|
||
|
||
# parameters for fixation/saccade detection | ||
fixation_radius_threshold = 0.025 | ||
fixation_duration_threshold = 0.1 | ||
saccade_min_velocity = 2 | ||
max_saccade_duration = 0.5 | ||
|
||
# annotation constants (as given as arguments to train_classifier, and as used for file names in result_folder) | ||
annotation_all = 0 | ||
annotation_ways = 1 | ||
annotation_shop = 2 | ||
annotation_values = [annotation_all, annotation_ways, annotation_shop] | ||
|
||
# annotations used in merged_ids_* files in the feature_folder | ||
# column 1 | ||
time_window_annotation_wayI = 1 | ||
time_window_annotation_shop = 2 | ||
time_window_annotation_wayII = 3 | ||
# column 2 | ||
time_window_annotation_halfI = 1 | ||
time_window_annotation_halfII = 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
fixations_list_labels = ['mean x', 'mean y', | ||
'var x', 'var y', | ||
't start', 't end', | ||
'start index', 'end index', | ||
'mean diameter', 'var diameter', | ||
'mean successive angles', 'var successive angles' | ||
] | ||
fix_mean_x_i = 0 | ||
fix_mean_y_i = 1 | ||
fix_var_x_i = 2 | ||
fix_var_y_i = 3 | ||
fix_start_t_i = 4 | ||
fix_end_t_i = 5 | ||
fix_start_index_i = 6 | ||
fix_end_index_i = 7 | ||
fix_mean_diam_i = 8 | ||
fix_var_diam_i = 9 | ||
fix_mean_succ_angles = 10 | ||
fix_var_succ_angles = 11 | ||
|
||
saccades_list_labels = ['start x', 'start y', | ||
'end x', 'end y', | ||
'angle', | ||
't start', 't end', | ||
'start index', 'end index', | ||
'mean diameter', 'var diameter', | ||
'peak velocity', 'amplitude', | ||
] | ||
|
||
sacc_start_x_i = 0 | ||
sacc_start_y_i = 1 | ||
sacc_end_x_i = 2 | ||
sacc_end_y_i = 3 | ||
sacc_angle_i = 4 | ||
sacc_t_start_i = 5 | ||
sacc_t_end_i = 6 | ||
sacc_start_index_i = 7 | ||
sacc_end_index_i = 8 | ||
sacc_mean_diam_i = 9 | ||
sacc_var_diam_i = 10 | ||
sacc_peak_vel_i = 11 | ||
sacc_amplitude_i = 12 | ||
|
||
blink_list_labels = ['t start', 't end', 'start index', 'end index'] | ||
|
||
blink_start_t_i = 0 | ||
blink_end_ti_i = 1 | ||
blink_start_index_i = 2 | ||
blink_end_index_i = 3 | ||
|
||
event_feature_labels = ['fixation rate', 'saccade rate', # 0 1 | ||
'small sacc. rate', 'large sacc. rate', 'positive sacc. rate', 'negative sacc. rate', # 2 3 4 5 | ||
'ratio sacc - fix', # 6 | ||
'ratio small sacc', 'ratio large sacc', 'ratio right sacc', 'ratio left sacc', # 7 8 9 10 | ||
'mean sacc amplitude', 'var sacc amplitude', 'min sacc amplitude', 'max sacc amplitude', #11 12 13 14 | ||
'mean peak velocity', 'var peak velocity', 'min peak velocity', 'max peak velocity', # 15 16 17 18 | ||
'mean mean diameter sacc', 'var mean diameter sacc', 'mean var diameter sacc', # 19 20 21 22 | ||
'var var diameter sacc', | ||
'mean fix duration', 'var fix duration', 'min fix duration', 'max fix duration', # 23 24 25 26 | ||
'dwelling time', | ||
'mean mean subsequent angle', 'var mean subsequent angle', 'mean var subsequent angle', 'var var subsequent angle', | ||
'mean var x', 'mean var y', 'var var x', 'var var y', # 27 28 29 30 | ||
'mean mean diameter fix', 'var mean diameter fix', 'mean var diameter fix', 'var var diameter fix', # 31 32 33 34 | ||
'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38 | ||
'blink rate' # 39 | ||
] | ||
|
||
event_feature_labels_long = ['fixation rate', 'saccade rate', # 0 1 | ||
'small saccade rate', 'large saccade rate', 'positive saccade rate', 'negative saccade rate', # 2 3 4 5 | ||
'saccade:fixation ratio', # 6 | ||
'ratio of small saccades', 'ratio of large saccades', 'ratio of right saccades', 'ratio of left saccades', # 7 8 9 10 | ||
'mean saccade amplitude', 'var saccade amplitude', 'min saccade amplitude', 'max saccade amplitude', #11 12 13 14 | ||
'mean saccadic peak velocity', 'var saccadic peak velocity', 'min saccadic peak velocity', 'max saccadic peak velocity', # 15 16 17 18 | ||
'mean of the mean pupil diameter during saccades', 'var of the mean pupil diameter during saccades', | ||
'mean of the var pupil diameter during saccades', 'var of the var pupil diameter during saccades', # 19 20 21 22 | ||
'mean fixation duration', 'var fixation duration', 'min fixation duration', 'max fixation duration', # 23 24 25 26 | ||
'dwelling time', | ||
'mean of the mean of subsequent angles', 'var of the mean of subsequent angles', | ||
'mean of the var of subsequent angles', 'var of the var of subsequent angles', | ||
'mean of the var of x', 'mean of the var of y', 'var of the var of x', 'var of the var of y', # 27 28 29 30 | ||
'mean of the mean pupil diameter during fixations', 'var of the mean pupil diameter during fixations', | ||
'mean of the var pupil diameter during fixations', 'var of the var pupil diameter during fixations', # 31 32 33 34 | ||
'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38 | ||
'blink rate' # 39 | ||
] | ||
|
||
def get_wordbook_feature_labels(movement_abbreviation): | ||
return [movement_abbreviation + s + ' WB' + str(n) for n in [1, 2, 3, 4] for s in ['>0', 'max', 'min', 'arg max', 'arg min', 'range', 'mean', 'var']] | ||
|
||
def get_wordbook_feature_labels_long(movement_abbreviation): | ||
return [s1 + str(n) + '-gram ' + movement_abbreviation + s2 for n in [1, 2, 3, 4] | ||
for (s1, s2) in [('number of different ', ' movements'), | ||
('max frequency ', ' movements'), | ||
('min frequency ', ' movements'), | ||
('most frequent ', ' movement'), | ||
('least frequent ', ' movement'), | ||
('range of frequencies of ', ' movements'), | ||
('mean frequency of ', ' movements'), | ||
('var frequency of ', ' movements') | ||
]] | ||
|
||
position_feature_labels = ['mean x', 'mean y', 'mean diameter', | ||
'min x', 'min y', 'min diameter', | ||
'max x', 'max y', 'max diameter', | ||
'min-max x', 'min-max y', 'min-max diameter', | ||
'std x', 'std y', 'std diameter', | ||
'median x', 'median y', 'median diameter', | ||
'1st quart x', '1st quart y', '1st quart diameter', | ||
'3rd quart x', '3rd quart y', '3rd quart diameter', | ||
'IQR x', 'IQR y', 'IQR diameter', | ||
'mean abs diff x', 'mean abs diff y', 'mean abs diff diameter', | ||
'mean diff x', 'mean diff y', 'mean diff diameter', | ||
'mean subsequent angle' | ||
] | ||
|
||
position_feature_labels_long = ['mean x', 'mean y', 'mean pupil diameter', | ||
'minimum x', 'minimum y', 'minimum pupil diameter', | ||
'maximum x', 'maximum y', 'maximum pupil diameter', | ||
'range x', 'range y', 'range pupil diameter', | ||
'std x', 'std y', 'std pupil diameter', | ||
'median x', 'median y', 'median pupil diameter', | ||
'1st quartile x', '1st quartile y', '1st quartile pupil diameter', | ||
'3rd quartile x', '3rd quartile y', '3rd quartile pupil diameter', | ||
'inter quartile range x', 'inter quartile range y', 'inter quartile range pupil diameter', | ||
'mean difference of subsequent x', 'mean difference of subsequent y', 'mean difference of subsequent pupil diameters', | ||
'mean diff x', 'mean diff y', 'mean diff pupil diameter', | ||
'mean subsequent angle' | ||
] | ||
|
||
heatmap_feature_labels = ['heatmap_'+str(i).zfill(2) for i in xrange(0, 64)] | ||
heatmap_feature_labels_long = ['heatmap cell '+str(i).zfill(2) for i in xrange(0, 64)] | ||
|
||
full_label_list = event_feature_labels + heatmap_feature_labels + position_feature_labels + \ | ||
get_wordbook_feature_labels('sacc.') + get_wordbook_feature_labels('SF') | ||
|
||
full_long_label_list = event_feature_labels_long + heatmap_feature_labels_long + position_feature_labels_long + \ | ||
get_wordbook_feature_labels_long('sacc.') + get_wordbook_feature_labels_long('SF') | ||
|
||
|
||
sacc_dictionary = ['A', 'B', 'C', 'R', 'E', 'F', 'G', 'D', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'U', 'u', 'b', 'r', 'f', | ||
'd', 'j', 'l', 'n'] | ||
sacc_bins_two = [a+b for a in sacc_dictionary for b in sacc_dictionary] | ||
sacc_bins_three = [a+b+c for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary] | ||
sacc_bins_four = [a+b+c+d for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary for d in sacc_dictionary] | ||
sacc_bins = [sacc_dictionary, sacc_bins_two, sacc_bins_three, sacc_bins_four] | ||
|
||
saccFix_dictionary = ['S_lu', 'S_ld', 'S_lr', 'S_ll', 'S_su', 'S_sd', 'S_sr', 'S_sl', 'F_l', 'F_s'] | ||
saccFix_bins_two = [a+b for a in saccFix_dictionary for b in saccFix_dictionary] | ||
saccFix_bins_three = [a+b+c for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary] | ||
saccFix_bins_four = [a+b+c+d for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary for d in saccFix_dictionary] | ||
saccFix_bins = [saccFix_dictionary, saccFix_bins_two, saccFix_bins_three, saccFix_bins_four] | ||
|
||
def write_pami_feature_labels_to_file(targetfile): | ||
f = open(targetfile, 'w') # creates if it does not exist | ||
f.write(',short,long\n') | ||
i = 0 | ||
for item1, item2 in zip(full_label_list, full_long_label_list): | ||
f.write(str(i) + ',' + item1 + ',' + item2 + '\n') | ||
i += 1 | ||
f.close() |
Empty file.
Oops, something went wrong.