From cebde1be1761205674b0ba79793734f1374b6a23 Mon Sep 17 00:00:00 2001 From: Sabrina Hoppe Date: Sat, 5 May 2018 21:25:39 +0200 Subject: [PATCH] feature extraction code --- 00_compute_features.py | 103 +++++ README.md | 11 +- __init__.py | 1 + config/__init__.py | 0 config/conf.py | 97 +++++ config/names.py | 160 ++++++++ featureExtraction/__init__.py | 0 featureExtraction/event_detection.py | 327 +++++++++++++++ featureExtraction/gaze_analysis.py | 582 +++++++++++++++++++++++++++ 9 files changed, 1279 insertions(+), 2 deletions(-) create mode 100644 00_compute_features.py create mode 100644 __init__.py create mode 100644 config/__init__.py create mode 100644 config/conf.py create mode 100644 config/names.py create mode 100644 featureExtraction/__init__.py create mode 100644 featureExtraction/event_detection.py create mode 100644 featureExtraction/gaze_analysis.py diff --git a/00_compute_features.py b/00_compute_features.py new file mode 100644 index 0000000..3914576 --- /dev/null +++ b/00_compute_features.py @@ -0,0 +1,103 @@ +import numpy as np +import os +from config import conf as conf +from featureExtraction import gaze_analysis as ga +import threading +import getopt +import sys +from config import names as gs + +def compute_sliding_window_features(participant, ws, gazeAnalysis_instance): + """ + calls the gazeAnalysis instance it was given, calls it to get features and saves those to file + """ + window_features, window_times = gazeAnalysis_instance.get_window_features(ws, conf.get_step_size(ws)) + np.save(conf.get_window_features_file(participant, ws), window_features) + np.save(conf.get_window_times_file(participant, ws), window_times) + +if __name__ == "__main__": + for p in xrange(0,conf.n_participants): + threads = [] # one thread per time window will be used and collected in this list + + # create data folder, plus one subfolder for participant p + if not os.path.exists(conf.get_feature_folder(p)): + os.makedirs(conf.get_feature_folder(p)) + + # make sure all relevant raw data files exist in the right folder + gaze_file = conf.get_data_folder(p) + '/gaze_positions.csv' + pupil_diameter_file = conf.get_data_folder(p) + '/pupil_diameter.csv' + events_file = conf.get_data_folder(p) + '/events.csv' + assert os.path.exists(gaze_file) and os.path.exists(pupil_diameter_file) and os.path.exists(events_file) + + # load relevant data + gaze = np.genfromtxt(gaze_file, delimiter=',', skip_header=1) + pupil_diameter = np.genfromtxt(pupil_diameter_file, delimiter=',', skip_header=1) + events = np.genfromtxt(events_file, delimiter=',', skip_header=1, dtype=str) + + # create instance of gazeAnalysis class that will be used for feature extraction + # this already does some initial computation that will be useful for all window sizes: + extractor = ga.gazeAnalysis(gaze, conf.fixation_radius_threshold, conf.fixation_duration_threshold, + conf.saccade_min_velocity, conf.max_saccade_duration, + pupil_diameter=pupil_diameter, event_strings=events) + + # compute sliding window features by creating one thread per window size + for window_size in conf.all_window_sizes: + if not os.path.exists(conf.get_window_features_file(p, window_size)): + thread = threading.Thread(target=compute_sliding_window_features, args=(p, window_size, extractor)) + thread.start() + threads.append(thread) + + for t in threads: + t.join() + + print 'finished all features for participant', p + + # Merge the features from all participants into three files per window_size: + # merged_features includes all features + # merged_traits contains the ground truth personality score ranges + # merged_ids contains the participant number and context (way, shop, half of the recording) + + # load ground truth from info folder: + binned_personality = np.genfromtxt(conf.binned_personality_file, delimiter=',', skip_header=1) + trait_labels = np.loadtxt(conf.binned_personality_file, delimiter=',', dtype=str)[0,:] + annotation = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1) + + for window_size in conf.all_window_sizes: + print 'merging window size', window_size + + windowfeats_subtask_all = [] + windowfeats_subtask_ids = [] + windowfeats_subtask_all_y = [] + + for p in xrange(0, conf.n_participants): + featfilename = conf.get_window_features_file(p, window_size) + timesfilename = conf.get_window_times_file(p, window_size) + if os.path.exists(featfilename) and os.path.exists(timesfilename): + data = np.load(featfilename).tolist() + windowfeats_subtask_all.extend(data) + windowfeats_subtask_all_y.extend([binned_personality[p, 1:]] * len(data)) + + times = np.load(timesfilename)[:, 2:] + ann = annotation[p,1:] + + ids_annotation = np.zeros((len(data), 3), dtype=int) # person, way/shop, half + ids_annotation[:,0] = p + ids_annotation[(times[:,1] < ann[0]),1] = conf.time_window_annotation_wayI + ids_annotation[(times[:,0] > ann[0]) & (times[:,1] < ann[1]),1] = conf.time_window_annotation_shop + ids_annotation[(times[:,0] > ann[1]),1] = conf.time_window_annotation_wayII + ids_annotation[:(len(data)/2), 2] = conf.time_window_annotation_halfI + ids_annotation[(len(data)/2):, 2] = conf.time_window_annotation_halfII + + windowfeats_subtask_ids.extend(ids_annotation.tolist()) + else: + print 'did not find ', featfilename + sys.exit(1) + + ids = np.array(windowfeats_subtask_ids) + x = np.array(windowfeats_subtask_all, dtype=float) + y = np.array(windowfeats_subtask_all_y) + f1, f2, f3 = conf.get_merged_feature_files(window_size) + + np.savetxt(f1, x, delimiter=',', header=','.join(gs.full_long_label_list), comments='') + np.savetxt(f2, y, delimiter=',', header=','.join(trait_labels), comments='') + np.savetxt(f3, ids, delimiter=',', header='Participant ID', comments='') diff --git a/README.md b/README.md index 625e68f..6542f04 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # Eye movements during everyday behavior predict personality traits *Sabrina Hoppe, Tobias Loetscher, Stephanie Morey and Andreas Bulling* -This repository provides all data used for the publication [in Frontiers in Human Neuroscience](https://dx.doi.org/10.3389/fnhum.2018.00105). -Code is coming soon! +This repository provides all data and code used for the publication [in Frontiers in Human Neuroscience](https://dx.doi.org/10.3389/fnhum.2018.00105). ## Dataset * Gaze data recorded at 60Hz from 42 participants is stored in `data/ParticipantXX`. @@ -20,6 +19,14 @@ Code is coming soon! * Timestamps indicating the times when participants entered and left the shop are given in `info/annotation.csv` in seconds. + +## Code +reproducing the paper results step by step: +1. __Extract features from raw gaze data__: + `python compute_features.py` to compute gaze features for all participants + Once extracted, the features are stored in `features/ParticipantXX/window_features_YY.npy` where XX is the participant number and YY the length of the sliding window in seconds. + + ## Citation If you want to cite this project, please use the following Bibtex format: diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ + diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config/conf.py b/config/conf.py new file mode 100644 index 0000000..1e1f27d --- /dev/null +++ b/config/conf.py @@ -0,0 +1,97 @@ +import numpy as np + +# global parameters +n_participants = 42 +n_traits = 7 +max_n_feat = 207 +max_n_iter = 100 +all_window_sizes = [5, 15, 30, 45, 60, 75, 90, 105, 120, 135] +all_shop_window_sizes = [5, 15] # at least 3/4 of the people have a time window in these times + +# cross validation paramters +n_inner_folds = 3 +n_outer_folds = 5 + +# Random Forest Parameters +tree_max_features = 15 +tree_max_depth = 5 +n_estimators = 100 +max_n_jobs = 5 + +# given a window size, determine step size correctly for even and odd numbers +def get_step_size(window_size): + step_size = window_size / 2.0 + if step_size * 10 % 2 == 0: + step_size = int(step_size) + return step_size + +# relative paths +data_folder = 'data' +info_folder = 'info' +feature_folder = 'features' +result_folder = 'results' +figure_folder = 'figures' +annotation_path = info_folder + '/annotation.csv' +binned_personality_file = info_folder + '/binned_personality.csv' +personality_sex_age_file = info_folder + '/personality_sex_age.csv' + +# load the personality trait names from file and map them to abbreviations +traitlabels = np.loadtxt(binned_personality_file, delimiter=',', dtype=str)[0, 1:] +def get_abbr(s): + return ''.join(item[0] for item in s.split() if item[0].isupper()) +medium_traitlabels = [get_abbr(s) if (" " in s) else s for s in traitlabels] +short_traitlabels = [''.join(item[0] for item in tl.split() if item[0].isupper()) for tl in traitlabels] + + +# dynamically create relative paths for result files to create +def get_result_folder(annotation_val): + return result_folder + '/A' + str(annotation_val) + +def get_result_filename(annotation_val, trait, shuffle_labels, i, add_suffix=False): + filename = get_result_folder(annotation_val) + '/' + short_traitlabels[trait] + if shuffle_labels: + filename += '_rnd' + filename += '_' + str(i).zfill(3) + if add_suffix: + filename += '.npz' + return filename + +def get_feature_folder(participant): + return feature_folder + '/Participant' + str(participant).zfill(2) + +def get_merged_feature_files(window_size): + return feature_folder + '/merged_features_' + str(window_size) + '.csv', feature_folder + '/merged_traits_' + str(window_size) + '.csv', feature_folder + '/merged_ids_' + str(window_size) + '.csv' + +def get_data_folder(participant): + return data_folder + '/Participant' + str(participant).zfill(2) + +def get_window_times_file(participant, window_size): + return get_feature_folder(participant) + "/window_times_" + str(window_size) + '.npy' + +def get_window_features_file(participant, window_size): + return get_feature_folder(participant) + "/window_features_" + str(window_size) + '.npy' + +def get_overall_features_file(participant): + return get_feature_folder(participant) + "/overall_features.npy" + + +# parameters for fixation/saccade detection +fixation_radius_threshold = 0.025 +fixation_duration_threshold = 0.1 +saccade_min_velocity = 2 +max_saccade_duration = 0.5 + +# annotation constants (as given as arguments to train_classifier, and as used for file names in result_folder) +annotation_all = 0 +annotation_ways = 1 +annotation_shop = 2 +annotation_values = [annotation_all, annotation_ways, annotation_shop] + +# annotations used in merged_ids_* files in the feature_folder +# column 1 +time_window_annotation_wayI = 1 +time_window_annotation_shop = 2 +time_window_annotation_wayII = 3 +# column 2 +time_window_annotation_halfI = 1 +time_window_annotation_halfII = 2 diff --git a/config/names.py b/config/names.py new file mode 100644 index 0000000..824b754 --- /dev/null +++ b/config/names.py @@ -0,0 +1,160 @@ +fixations_list_labels = ['mean x', 'mean y', + 'var x', 'var y', + 't start', 't end', + 'start index', 'end index', + 'mean diameter', 'var diameter', + 'mean successive angles', 'var successive angles' + ] +fix_mean_x_i = 0 +fix_mean_y_i = 1 +fix_var_x_i = 2 +fix_var_y_i = 3 +fix_start_t_i = 4 +fix_end_t_i = 5 +fix_start_index_i = 6 +fix_end_index_i = 7 +fix_mean_diam_i = 8 +fix_var_diam_i = 9 +fix_mean_succ_angles = 10 +fix_var_succ_angles = 11 + +saccades_list_labels = ['start x', 'start y', + 'end x', 'end y', + 'angle', + 't start', 't end', + 'start index', 'end index', + 'mean diameter', 'var diameter', + 'peak velocity', 'amplitude', + ] + +sacc_start_x_i = 0 +sacc_start_y_i = 1 +sacc_end_x_i = 2 +sacc_end_y_i = 3 +sacc_angle_i = 4 +sacc_t_start_i = 5 +sacc_t_end_i = 6 +sacc_start_index_i = 7 +sacc_end_index_i = 8 +sacc_mean_diam_i = 9 +sacc_var_diam_i = 10 +sacc_peak_vel_i = 11 +sacc_amplitude_i = 12 + +blink_list_labels = ['t start', 't end', 'start index', 'end index'] + +blink_start_t_i = 0 +blink_end_ti_i = 1 +blink_start_index_i = 2 +blink_end_index_i = 3 + +event_feature_labels = ['fixation rate', 'saccade rate', # 0 1 + 'small sacc. rate', 'large sacc. rate', 'positive sacc. rate', 'negative sacc. rate', # 2 3 4 5 + 'ratio sacc - fix', # 6 + 'ratio small sacc', 'ratio large sacc', 'ratio right sacc', 'ratio left sacc', # 7 8 9 10 + 'mean sacc amplitude', 'var sacc amplitude', 'min sacc amplitude', 'max sacc amplitude', #11 12 13 14 + 'mean peak velocity', 'var peak velocity', 'min peak velocity', 'max peak velocity', # 15 16 17 18 + 'mean mean diameter sacc', 'var mean diameter sacc', 'mean var diameter sacc', # 19 20 21 22 + 'var var diameter sacc', + 'mean fix duration', 'var fix duration', 'min fix duration', 'max fix duration', # 23 24 25 26 + 'dwelling time', + 'mean mean subsequent angle', 'var mean subsequent angle', 'mean var subsequent angle', 'var var subsequent angle', + 'mean var x', 'mean var y', 'var var x', 'var var y', # 27 28 29 30 + 'mean mean diameter fix', 'var mean diameter fix', 'mean var diameter fix', 'var var diameter fix', # 31 32 33 34 + 'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38 + 'blink rate' # 39 + ] + +event_feature_labels_long = ['fixation rate', 'saccade rate', # 0 1 + 'small saccade rate', 'large saccade rate', 'positive saccade rate', 'negative saccade rate', # 2 3 4 5 + 'saccade:fixation ratio', # 6 + 'ratio of small saccades', 'ratio of large saccades', 'ratio of right saccades', 'ratio of left saccades', # 7 8 9 10 + 'mean saccade amplitude', 'var saccade amplitude', 'min saccade amplitude', 'max saccade amplitude', #11 12 13 14 + 'mean saccadic peak velocity', 'var saccadic peak velocity', 'min saccadic peak velocity', 'max saccadic peak velocity', # 15 16 17 18 + 'mean of the mean pupil diameter during saccades', 'var of the mean pupil diameter during saccades', + 'mean of the var pupil diameter during saccades', 'var of the var pupil diameter during saccades', # 19 20 21 22 + 'mean fixation duration', 'var fixation duration', 'min fixation duration', 'max fixation duration', # 23 24 25 26 + 'dwelling time', + 'mean of the mean of subsequent angles', 'var of the mean of subsequent angles', + 'mean of the var of subsequent angles', 'var of the var of subsequent angles', + 'mean of the var of x', 'mean of the var of y', 'var of the var of x', 'var of the var of y', # 27 28 29 30 + 'mean of the mean pupil diameter during fixations', 'var of the mean pupil diameter during fixations', + 'mean of the var pupil diameter during fixations', 'var of the var pupil diameter during fixations', # 31 32 33 34 + 'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38 + 'blink rate' # 39 + ] + +def get_wordbook_feature_labels(movement_abbreviation): + return [movement_abbreviation + s + ' WB' + str(n) for n in [1, 2, 3, 4] for s in ['>0', 'max', 'min', 'arg max', 'arg min', 'range', 'mean', 'var']] + +def get_wordbook_feature_labels_long(movement_abbreviation): + return [s1 + str(n) + '-gram ' + movement_abbreviation + s2 for n in [1, 2, 3, 4] + for (s1, s2) in [('number of different ', ' movements'), + ('max frequency ', ' movements'), + ('min frequency ', ' movements'), + ('most frequent ', ' movement'), + ('least frequent ', ' movement'), + ('range of frequencies of ', ' movements'), + ('mean frequency of ', ' movements'), + ('var frequency of ', ' movements') + ]] + +position_feature_labels = ['mean x', 'mean y', 'mean diameter', + 'min x', 'min y', 'min diameter', + 'max x', 'max y', 'max diameter', + 'min-max x', 'min-max y', 'min-max diameter', + 'std x', 'std y', 'std diameter', + 'median x', 'median y', 'median diameter', + '1st quart x', '1st quart y', '1st quart diameter', + '3rd quart x', '3rd quart y', '3rd quart diameter', + 'IQR x', 'IQR y', 'IQR diameter', + 'mean abs diff x', 'mean abs diff y', 'mean abs diff diameter', + 'mean diff x', 'mean diff y', 'mean diff diameter', + 'mean subsequent angle' + ] + +position_feature_labels_long = ['mean x', 'mean y', 'mean pupil diameter', + 'minimum x', 'minimum y', 'minimum pupil diameter', + 'maximum x', 'maximum y', 'maximum pupil diameter', + 'range x', 'range y', 'range pupil diameter', + 'std x', 'std y', 'std pupil diameter', + 'median x', 'median y', 'median pupil diameter', + '1st quartile x', '1st quartile y', '1st quartile pupil diameter', + '3rd quartile x', '3rd quartile y', '3rd quartile pupil diameter', + 'inter quartile range x', 'inter quartile range y', 'inter quartile range pupil diameter', + 'mean difference of subsequent x', 'mean difference of subsequent y', 'mean difference of subsequent pupil diameters', + 'mean diff x', 'mean diff y', 'mean diff pupil diameter', + 'mean subsequent angle' + ] + +heatmap_feature_labels = ['heatmap_'+str(i).zfill(2) for i in xrange(0, 64)] +heatmap_feature_labels_long = ['heatmap cell '+str(i).zfill(2) for i in xrange(0, 64)] + +full_label_list = event_feature_labels + heatmap_feature_labels + position_feature_labels + \ + get_wordbook_feature_labels('sacc.') + get_wordbook_feature_labels('SF') + +full_long_label_list = event_feature_labels_long + heatmap_feature_labels_long + position_feature_labels_long + \ + get_wordbook_feature_labels_long('sacc.') + get_wordbook_feature_labels_long('SF') + + +sacc_dictionary = ['A', 'B', 'C', 'R', 'E', 'F', 'G', 'D', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'U', 'u', 'b', 'r', 'f', + 'd', 'j', 'l', 'n'] +sacc_bins_two = [a+b for a in sacc_dictionary for b in sacc_dictionary] +sacc_bins_three = [a+b+c for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary] +sacc_bins_four = [a+b+c+d for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary for d in sacc_dictionary] +sacc_bins = [sacc_dictionary, sacc_bins_two, sacc_bins_three, sacc_bins_four] + +saccFix_dictionary = ['S_lu', 'S_ld', 'S_lr', 'S_ll', 'S_su', 'S_sd', 'S_sr', 'S_sl', 'F_l', 'F_s'] +saccFix_bins_two = [a+b for a in saccFix_dictionary for b in saccFix_dictionary] +saccFix_bins_three = [a+b+c for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary] +saccFix_bins_four = [a+b+c+d for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary for d in saccFix_dictionary] +saccFix_bins = [saccFix_dictionary, saccFix_bins_two, saccFix_bins_three, saccFix_bins_four] + +def write_pami_feature_labels_to_file(targetfile): + f = open(targetfile, 'w') # creates if it does not exist + f.write(',short,long\n') + i = 0 + for item1, item2 in zip(full_label_list, full_long_label_list): + f.write(str(i) + ',' + item1 + ',' + item2 + '\n') + i += 1 + f.close() diff --git a/featureExtraction/__init__.py b/featureExtraction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/featureExtraction/event_detection.py b/featureExtraction/event_detection.py new file mode 100644 index 0000000..f251a7e --- /dev/null +++ b/featureExtraction/event_detection.py @@ -0,0 +1,327 @@ +import numpy as np +import sys +import math +from config import names as gs + + +def get_fixation_list(gaze, errors, xi, yi, ti, fixation_radius_threshold, fixation_duration_threshold, pupil_diameter): + n, m = gaze.shape + fixations = [] + fixation = [] # single fixation, to be appended to fixations + counter = 0 # number of points in the fixation + sumx = 0 # used to compute the center of a fixation in x and y direction + sumy = 0 + distance = 0 # captures the distance of a current sample from the fixation center + i = 0 # iterates through the gaze samples + + while i < n - 1: + x = gaze[i, xi] + y = gaze[i, yi] + + if counter == 0: + # ignore erroneous samples before a fixation + if errors[i]: + i += 1 + continue + centerx = x + centery = y + else: + centerx = np.true_divide(sumx, counter) + centery = np.true_divide(sumy, counter) + + if not errors[i]: # only update distance if the current sample is not erroneous + distance = np.sqrt((x - centerx) * (x - centerx) + (y - centery) * (y - centery)) + + if distance > fixation_radius_threshold: # start new fixation + if gaze[(i - 1), ti] - gaze[(i - counter), ti] >= fixation_duration_threshold: + start_index = i - counter + 1 + end_index = i - 1 - 1 + + # discard fixations with more than 50% erroneous samples + percentage_error = np.sum(errors[start_index:(end_index + 1)]) / float(end_index - start_index) + if percentage_error >= 0.5: + if errors[i]: + i += 1 + counter = 0 + else: + counter = 1 + sumx = x + sumy = y + continue + + gaze_indices = np.arange(start_index, end_index+1)[np.logical_not(errors[start_index:(end_index + 1)])] + + start_index = gaze_indices[0] + end_index = gaze_indices[-1] + + gazex = gaze[start_index:(end_index + 1), xi][np.logical_not(errors[start_index:(end_index + 1)])] + gazey = gaze[start_index:(end_index + 1), yi][np.logical_not(errors[start_index:(end_index + 1)])] + gazet = gaze[start_index:(end_index + 1), ti][np.logical_not(errors[start_index:(end_index + 1)])] + + # extract fixation characteristics + fixation.append(np.mean(gazex)) # 0.-1. mean x,y + fixation.append(np.mean(gazey)) + fixation.append(np.var(gazex)) # 2-3. var x, y + fixation.append(np.var(gazey)) + fixation.append(gazet[0]) # 4-5. t_start, t_end + fixation.append(gazet[-1]) + fixation.append(gaze_indices[0]) # 6-7. index_start, index_end + fixation.append(gaze_indices[-1]) + + ds = ((pupil_diameter[start_index:(end_index+1), 1] + pupil_diameter[start_index:(end_index+1), 2]) / 2.)[np.logical_not(errors[start_index:(end_index+1)])] + + fixation.append(np.mean(ds)) # 8. mean pupil diameter + fixation.append(np.var(ds)) # 9. var pupil diameter + + succ_dx = gazex[1:] - gazex[:-1] + succ_dy = gazey[1:] - gazey[:-1] + succ_angles = np.arctan2(succ_dy, succ_dx) + + fixation.append(np.mean(succ_angles)) # 10 mean successive angle + fixation.append(np.var(succ_angles)) # 11 var successive angle + fixations.append(fixation) + assert len(fixation) == len(gs.fixations_list_labels) + + # set up new fixation + fixation = [] + if errors[i]: + i += 1 + counter = 0 + else: + counter = 1 + sumx = x + sumy = y + else: + if not errors[i]: + counter += 1 + sumx += x + sumy += y + + i += 1 + return fixations + + +def get_saccade_list(gaze, fixations, xi, yi, ti, pupil_diameter, fixation_radius_threshold, errors, + saccade_min_velocity, max_saccade_duration): + saccades = [] + wordbook_string = [] + + # each movement between two subsequent fixations could be a saccade, but + for i in xrange(1, len(fixations)): + # ...not if the window is too long + duration = float(fixations[i][gs.fix_start_t_i] - fixations[i - 1][gs.fix_end_t_i]) + if duration > max_saccade_duration: + continue + + start_index = fixations[i - 1][gs.fix_end_index_i] + end_index = fixations[i][gs.fix_start_index_i] + + gazex = gaze[start_index:(end_index + 1), xi][np.logical_not(errors[start_index:(end_index + 1)])] + gazey = gaze[start_index:(end_index + 1), yi][np.logical_not(errors[start_index:(end_index + 1)])] + gazet = gaze[start_index:(end_index + 1), ti][np.logical_not(errors[start_index:(end_index + 1)])] + + dx = np.abs(gazex[1:] - gazex[:-1]) + dy = np.abs(gazey[1:] - gazey[:-1]) + dt = np.abs(gazet[1:] - gazet[:-1]) + + # ...not if less than 2 non-errouneous amples are left: + if len(dt) < 2: + continue + + distance = np.linalg.norm([dx, dy]) + peak_velocity = np.amax(distance / dt) + + start_x = gazex[0] + start_y = gazey[0] + end_x = gazex[-1] + end_y = gazey[-1] + + dx = end_x - start_x + dy = end_y - start_y + + # ...not if the amplitude is shorter than a fith of fixation_radius_threshold + amplitude = np.linalg.norm([dx, dy]) + if amplitude < fixation_radius_threshold / 5.0: + continue + + # ...not if the peak velocity is very low + if peak_velocity < saccade_min_velocity: + continue + + + percentage_error = np.sum(errors[start_index:(end_index + 1)]) / float(end_index - start_index) + # ...not if more than 50% of the data are erroneous + if percentage_error >= 0.5: + continue + else: # found saccade! + # compute characteristics of the saccade, like start and end point, amplitude, ... + saccade = [] + saccade.append(start_x) # 0.-1. start x,y + saccade.append(start_y) + saccade.append(end_x) # 2-3. end x,y + saccade.append(end_y) + + if dx == 0: + radians = 0 + else: + radians = np.arctan(np.true_divide(dy, dx)) + + if dx > 0: + if dy < 0: + radians += (2 * np.pi) + else: + radians = np.pi + radians + + saccade.append(radians) # 4. angle + saccade.append(fixations[i - 1][gs.fix_end_t_i]) # 5-6. t_start, t_end + saccade.append(fixations[i][gs.fix_start_t_i]) + saccade.append(start_index) # 7-8. index_start, index_end + saccade.append(end_index) + + ds = (pupil_diameter[start_index:(end_index + 1), 1] + pupil_diameter[start_index:(end_index + 1), + 2]) / 2.0 + saccade.append(np.mean(ds)) # 9. mean pupil diameter + saccade.append(np.var(ds)) # 10. var pupil diameter + saccade.append(peak_velocity) # 11. peak velocity + + saccade.append(amplitude) # 12. amplitude + + # append character representing this kind of saccade to the wordbook_string which will be used for n-gram features + sac_id = get_dictionary_entry_for_saccade(amplitude, fixation_radius_threshold, radians) + wordbook_string.append(sac_id) + saccades.append(saccade) + + # assert all saccade characteristics were computed + assert len(saccade) == len(gs.saccades_list_labels) + return saccades, wordbook_string + + +def get_blink_list(event_strings, gaze, ti): + assert len(event_strings) == len(gaze) + + # detect Blinks + blinks = [] + blink = [] # single blink, to be appended to blinks + i = 0 + starti = i + blink_started = False + + while i < len(event_strings) - 1: + if event_strings[i] == 'Blink' and not blink_started: # start new blink + starti = i + blink_started = True + elif blink_started and not event_strings[i] == 'Blink': + blink.append(gaze[starti, ti]) + blink.append(gaze[i - 1, ti]) + blink.append(starti) + blink.append(i - 1) + blinks.append(blink) + assert len(blink) == len(gs.blink_list_labels) + blink_started = False + blink = [] + i += 1 + + return blinks + + +def get_dictionary_entry_for_saccade(amplitude, fixation_radius_threshold, degree_radians): + # Saacade Type: small, iff amplitude less than 2 fixation_radius_thresholds + # U + # O A + # N u B + # M n b C + # L l r R + # K j f E + # J d F + # H G + # D + + + degrees = np.true_divide(degree_radians * 180.0, np.pi) + if amplitude < 2 * fixation_radius_threshold: + d_degrees = degrees / (np.true_divide(90, 4)) + if d_degrees < 1: + sac_id = 'r' + elif d_degrees < 3: + sac_id = 'b' + elif d_degrees < 5: + sac_id = 'u' + elif d_degrees < 7: + sac_id = 'n' + elif d_degrees < 9: + sac_id = 'l' + elif d_degrees < 11: + sac_id = 'j' + elif d_degrees < 13: + sac_id = 'd' + elif d_degrees < 15: + sac_id = 'f' + elif d_degrees < 16: + sac_id = 'r' + else: + print + print 'error! d_degrees cannot be matched to a sac_id for a small saccade ', d_degrees + sys.exit(1) + + else: # large + d_degrees = degrees / (np.true_divide(90, 8)) + + if d_degrees < 1: + sac_id = 'R' + elif d_degrees < 3: + sac_id = 'C' + elif d_degrees < 5: + sac_id = 'B' + elif d_degrees < 7: + sac_id = 'A' + elif d_degrees < 9: + sac_id = 'U' + elif d_degrees < 11: + sac_id = 'O' + elif d_degrees < 13: + sac_id = 'N' + elif d_degrees < 15: + sac_id = 'M' + elif d_degrees < 17: + sac_id = 'L' + elif d_degrees < 19: + sac_id = 'K' + elif d_degrees < 21: + sac_id = 'J' + elif d_degrees < 23: + sac_id = 'H' + elif d_degrees < 25: + sac_id = 'D' + elif d_degrees < 27: + sac_id = 'G' + elif d_degrees < 29: + sac_id = 'F' + elif d_degrees < 31: + sac_id = 'E' + elif d_degrees < 33: + sac_id = 'R' + else: + print 'error! d_degrees cannot be matched to a sac_id for a large saccade: ', d_degrees + sys.exit(1) + return sac_id + + +def detect_all(gaze, errors, ti, xi, yi, fixation_radius_threshold=0.01, pupil_diameter=None, event_strings=None, + fixation_duration_threshold=0.1, saccade_min_velocity=2, max_saccade_duration=0.1): + """ + :param gaze: gaze data, typically [t,x,y] + :param fixation_radius_threshold: dispersion threshold + :param fixation_duration_threshold: temporal threshold + :param ti, xi, yi: index data for gaze,i.e. for [t,x,y] ti=0, xi=1, yi=2 + :param pupil_diameter: pupil diameters values, same length as gaze + :param event_strings: list of events, here provided by SMI. used to extract blink information + """ + + fixations = get_fixation_list(gaze, errors, xi, yi, ti, fixation_radius_threshold, fixation_duration_threshold, + pupil_diameter) + saccades, wordbook_string = get_saccade_list(gaze, fixations, xi, yi, ti, pupil_diameter, + fixation_radius_threshold, errors, saccade_min_velocity, + max_saccade_duration) + blinks = get_blink_list(event_strings, gaze, ti) + + return fixations, saccades, blinks, wordbook_string diff --git a/featureExtraction/gaze_analysis.py b/featureExtraction/gaze_analysis.py new file mode 100644 index 0000000..e54457e --- /dev/null +++ b/featureExtraction/gaze_analysis.py @@ -0,0 +1,582 @@ +#!/usr/bin/python +import numpy as np +import sys, os +from featureExtraction import event_detection as ed +import operator +from config import names as gs + +class gazeAnalysis (object): + # dictionary for saccade-based n-grams: + # each character encodes one direction, capital characters stand for long saccades, the others for short ones + # short means the saccade amplitude is less than 2 fixation_radius_thresholds + # U + # O A + # N u B + # M n b C + # L l . r R + # K j f E + # J d F + # H G + # D + sacc_dictionary = ['A', 'B', 'C', 'R', 'E', 'F', 'G', 'D', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'U', 'u', 'b', 'r', 'f', + 'd', 'j', 'l', 'n'] + sacc_bins_two = [a+b for a in sacc_dictionary for b in sacc_dictionary] + sacc_bins_three = [a+b+c for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary] + sacc_bins_four = [a+b+c+d for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary for d in sacc_dictionary] + sacc_bins = [sacc_dictionary, sacc_bins_two, sacc_bins_three, sacc_bins_four] + + # dictionary for saccade and fixation-based n-grams: + # S are saccades, long or short (i.e. longer or shorter than the fixation radius), and up/down/right/left + # e.g. S_lu is a long saccade up + # F are fixations, either long or short (i.e. longer or shorter than twice the minimum fixation duration) + # saccFix_dictionary = ['S_lu', 'S_ld', 'S_lr', 'S_ll', 'S_su', 'S_sd', 'S_sr', 'S_sl', 'F_l', 'F_s'] + saccFix_dictionary = ['U', 'D', 'R', 'L', 'u', 'd', 'r', 'l', 'F', 'f'] + saccFix_bins_two = [a+b for a in saccFix_dictionary for b in saccFix_dictionary] + saccFix_bins_three = [a+b+c for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary] + saccFix_bins_four = [a+b+c+d for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary for d in saccFix_dictionary] + saccFix_bins = [saccFix_dictionary, saccFix_bins_two, saccFix_bins_three, saccFix_bins_four] + + def __init__(self, gaze, fixation_radius_threshold, fixation_duration_threshold, saccade_min_velocity,max_saccade_duration, + pupil_diameter=None, event_strings=None, ti=0, xi=1, yi=2): + assert gaze.size > 0 + + # save data in instance + self.gaze = gaze + self.diams = pupil_diameter + self.event_strings = event_strings + + # save constants, indices and thresholds that will be used muttiple times + self.fixation_radius_threshold = fixation_radius_threshold + self.fixation_duration_threshold = fixation_duration_threshold + self.xi = xi + self.yi = yi + self.ti = ti + + # detect errors, fixations, saccades and blinks + self.errors = self.detect_errors() + self.fixations, self.saccades, self.blinks, self.wordbook_string = \ + ed.detect_all(self.gaze, self.errors, self.ti, self.xi, self.yi, pupil_diameter=pupil_diameter, + event_strings=event_strings, fixation_duration_threshold=fixation_duration_threshold, + fixation_radius_threshold=fixation_radius_threshold, saccade_min_velocity=saccade_min_velocity, + max_saccade_duration=max_saccade_duration) + + def detect_errors(self, confidence_threshold=0.8, outlier_threshold=0.5): + """ + :param confidence_threshold: threshold below which all gaze data is deleted + :param outlier_threshold: threshold beyond which gaze must not be outside the calibration area (i.e. [0,1]) + """ + errors = np.full((len(self.gaze)), False, dtype=bool) + + # gaze is nan + errors[np.isnan(self.gaze[:, self.xi])] = True + errors[np.isnan(self.gaze[:, self.yi])] = True + + # gaze outside a certain range + errors[self.gaze[:, self.xi] < -outlier_threshold] = True + errors[self.gaze[:, self.xi] > outlier_threshold + 1] = True + + errors[self.gaze[:, self.yi] < -outlier_threshold] = True + errors[self.gaze[:, self.yi] > outlier_threshold + 1] = True + + return errors + + def get_window_features(self, sliding_window_size, sliding_window_step_size, start_index=-1, end_index=-1): + """ + computes features using a sliding window approach with the given sliding_window_size and sliding_window_step_size + """ + # if no start and end index are given, use all data + if start_index == -1 and end_index == -1: + start_index = 0 + end_index = len(self.gaze[:, 0]) - 1 + + # compute start and end times of each resulting sliding window + window_times = self.get_sliding_windows(start_index, end_index, sliding_window_size, sliding_window_step_size) + + #compute features for each of these windows: + window_feature_list = [] + for [a,b,at,bt] in window_times: + overallstats = self.get_full_feature_vector(a, b) + window_feature_list.append(overallstats) + assert len(gs.full_label_list) == len(window_feature_list[0]) + + window_feature_list = np.array(window_feature_list) + window_times = np.array(window_times) + return window_feature_list, window_times + + def get_full_feature_vector(self, start_index, end_index): + """ + assembles the full feature vector of its part: + features based on fixations/saccades/blinks, raw data, heatmaps, n-grams based on saccades and n-grams based on saccades and fixations + """ + # features based on events, i.e. fixations/saccades/blinks + features = self.get_event_features(start_index, end_index) + assert len(gs.event_feature_labels) == len(features) + + # features based on raw data, like quartiles of gaze posiitons + raw_features = self.get_raw_features(start_index, end_index) + features.extend(raw_features) + assert len(gs.position_feature_labels) == len(raw_features) + + # heatmap features + heatmap_features = self.get_heatmap_features(start_index, end_index) + features.extend(heatmap_features) + assert len(gs.heatmap_feature_labels) == len(heatmap_features) + + # n-gram features based on saccades + sacc_wordbook_features = self.get_sacc_ngram_features(start_index, end_index) + features.extend(sacc_wordbook_features) + assert len(gs.get_wordbook_feature_labels('')) == len(sacc_wordbook_features) + + # n-gram features based on saccades and fixations + saccFix_wordbook_features = self.get_saccFix_ngram_features(start_index, end_index) + features.extend(saccFix_wordbook_features) + assert len(gs.get_wordbook_feature_labels('')) == len(saccFix_wordbook_features) + + return features + + def get_event_features(self, start_index, end_index): + """ + computes features based on fixations, saccades and blinks within the selected gaze window + """ + event_features = [] + + # get non-errouneous samples between start_index and end_index: + x,y,d = self.get_x_y_d_part(start_index, end_index) + + data = self.gaze[start_index:(end_index+1), :] + fixations, saccades, wordbook_string, blinks = self.get_FixSacWb_timed(start_index, end_index) + + n, m = data.shape + timespan = (data[-1,self.ti] - data[0,self.ti]) + + num_fix =len(fixations) + num_sacc =len(saccades) + + if timespan == 0: + fix_rate = 0 + sacc_rate = 0 + else: + fix_rate = num_fix / float(timespan) + sacc_rate = num_sacc / float(timespan) + + event_features.append(fix_rate) # 0. rate of fixations + event_features.append(sacc_rate) # 1. rate of saccades + + + sacc_UR_L = wordbook_string.count('U') + wordbook_string.count('A') + wordbook_string.count('B') + wordbook_string.count('C') + sacc_BR_L = wordbook_string.count('R') + \ + wordbook_string.count('E') + \ + wordbook_string.count('F') + \ + wordbook_string.count('G') + sacc_UL_L = wordbook_string.count('O') \ + + wordbook_string.count('N') \ + + wordbook_string.count('M') \ + + wordbook_string.count('L') + sacc_BL_L = wordbook_string.count('K') \ + + wordbook_string.count('J') \ + + wordbook_string.count('H') \ + + wordbook_string.count('D') + + sacc_UR_S = wordbook_string.count('u') \ + + wordbook_string.count('b') + sacc_BR_S = wordbook_string.count('r') \ + + wordbook_string.count('f') + sacc_UL_S = wordbook_string.count('n') \ + + wordbook_string.count('l') + sacc_BL_S = wordbook_string.count('j') \ + + wordbook_string.count('d') + + num_s_sacc = sacc_UR_S + sacc_BR_S + sacc_UL_S + sacc_BL_S + num_la_sacc = sacc_UR_L + sacc_BR_L + sacc_UL_L + sacc_BL_L + num_r_sacc = sacc_UR_S + sacc_BR_S + sacc_UR_L + sacc_BR_L + num_l_sacc = sacc_UL_S + sacc_BL_S + sacc_UL_L + sacc_BL_L + + if timespan > 0: + event_features.append(num_s_sacc / float(timespan)) #2. rate of small saccades + event_features.append(num_la_sacc / float(timespan)) #3. rate of large saccades + event_features.append(num_r_sacc / float(timespan)) #4. rate of pos saccades + event_features.append(num_l_sacc / float(timespan)) #5. rate of neg saccades + else: + event_features.extend([0]*4) + + if num_fix > 0: + event_features.append(num_sacc / float(num_fix)) # 6. ratio saccades / fixations + else: + event_features.append(0) + + if num_sacc > 0: + event_features.append(num_s_sacc /float(num_sacc)) # 7. ratio small sacc + event_features.append(num_la_sacc / float(num_sacc)) # 8. ratio large sacc + event_features.append(num_r_sacc / float(num_sacc)) # 9. ratio pos sacc + event_features.append(num_l_sacc / float(num_sacc)) # 10. ratio neg sacc + else: + event_features.extend([0]*4) + + sacc_array = np.array(saccades) + fix_array = np.array(fixations) + + if sacc_array.size > 0: + # amplitude features + amplitudes = sacc_array[:, gs.sacc_amplitude_i] + event_features.append(np.mean(amplitudes)) # 11: mean sacc amplitude + event_features.append(np.var(amplitudes)) # 12: var sacc amplitude + event_features.append(amplitudes.min()) # 13 min sacc amplitude + event_features.append(amplitudes.max()) # 14: max sacc amplitude + + # peak velocity features + velocities = sacc_array[:, gs.sacc_peak_vel_i] + event_features.append(np.mean(velocities)) # 15: mean peak velocity + event_features.append(np.var(velocities)) # 16: var peak velocity + event_features.append(velocities.min()) # 17: min peak velocity + event_features.append(velocities.max()) # 18: max peak velocity + + if sacc_array[0, :].size == 13: + event_features.append(np.mean(sacc_array[:, gs.sacc_mean_diam_i])) # 19 mean mean diameter + event_features.append(np.var(sacc_array[:, gs.sacc_mean_diam_i])) # 20 var mean diameter + event_features.append(np.mean(sacc_array[:, gs.sacc_var_diam_i])) # 21 mean var diameter + event_features.append(np.var(sacc_array[:, gs.sacc_var_diam_i])) # 22 var var diameter + else: + event_features.extend([0]*4) + else: + event_features.extend([0]*12) + + if fix_array.size > 0: + durations = np.array(fix_array[:, gs.fix_end_t_i]) - np.array(fix_array[:, gs.fix_start_t_i]) + event_features.append(np.mean(durations)) # 23: mean fix duration + event_features.append(np.var(durations)) # 24: var fix duration + event_features.append(durations.min()) # 25: min fix duration + event_features.append(durations.max()) # 26: max fix duration + event_features.append(durations.sum()) # 27: dwelling time + + event_features.append(np.mean(fix_array[:, gs.fix_mean_succ_angles])) # 28: mean mean subsequent angle + event_features.append(np.var(fix_array[:, gs.fix_mean_succ_angles])) # 28: var mean subsequent angle + event_features.append(np.mean(fix_array[:, gs.fix_var_succ_angles])) # 28: mean var subsequent angle + event_features.append(np.var(fix_array[:, gs.fix_var_succ_angles])) # 28: var var subsequent angle + + lnnII = np.logical_not(np.isnan(fix_array[:, gs.fix_var_x_i])) + lnnIII = np.logical_not(np.isnan(fix_array[:, gs.fix_var_y_i])) + event_features.append(np.mean(fix_array[lnnII, gs.fix_var_x_i])) # mean var x + event_features.append(np.mean(fix_array[lnnIII, gs.fix_var_y_i])) # mean var y + event_features.append(np.var(fix_array[lnnII, gs.fix_var_x_i])) # 29: var var x + event_features.append(np.var(fix_array[lnnIII, gs.fix_var_y_i])) # 30: var var y + + if fix_array[0, :].size == 12: + event_features.append(np.mean(fix_array[:, gs.fix_mean_diam_i])) # 31 mean mean diameter + event_features.append(np.var(fix_array[:, gs.fix_mean_diam_i])) # 32 var mean diameter + event_features.append(np.mean(fix_array[:, gs.fix_var_diam_i])) # 33 mean var diameter + event_features.append(np.var(fix_array[:, gs.fix_var_diam_i])) # 34 var var diameter + else: + event_features.extend([0]*4) + else: + event_features.extend([0]*17) + + blink_array = np.array(blinks) + if blink_array.size > 0: + durations = np.array(blink_array[:, 1]) - np.array(blink_array[:, 0]) + event_features.append(np.mean(durations)) #35 + event_features.append(np.var(durations)) #36 + event_features.append(durations.min()) #37 + event_features.append(durations.max()) #38 + event_features.append(np.true_divide(len(blink_array), timespan)) #39 + else: + event_features.extend([0]*5) + return event_features + + def get_x_y_d_part(self, start_index, end_index): + x = self.gaze[start_index:(end_index + 1), self.xi] + y = self.gaze[start_index:(end_index + 1), self.yi] + d = (self.diams[start_index:(end_index + 1), 1] + self.diams[start_index:(end_index + 1), 2]) / 2.0 + + err = self.errors[start_index:(end_index + 1)] + + x = x[np.logical_not(err)] + y = y[np.logical_not(err)] + d = d[np.logical_not(err)] + return x,y,d + + def get_raw_features(self, start_index, end_index): + """ + computes features based on raw gaze data, like percentiles of x coordinates + """ + raw_features = [] + + x,y,d = self.get_x_y_d_part(start_index, end_index) + + for a in [x, y, d]: + raw_features.append(np.mean(a)) + for a in [x, y, d]: + raw_features.append(np.amin(a)) + for a in [x, y, d]: + raw_features.append(np.amax(a)) + for a in [x, y, d]: + raw_features.append(np.amax(a) - np.amin(a)) + for a in [x, y, d]: + raw_features.append(np.std(a)) + for a in [x, y, d]: + raw_features.append(np.median(a)) + for a in [x, y, d]: + raw_features.append(np.percentile(a, 25)) + for a in [x, y, d]: + raw_features.append(np.percentile(a, 75)) + for a in [x, y, d]: + raw_features.append(np.percentile(a, 75) - np.percentile(a, 25)) + for a in [x, y, d]: + raw_features.append(np.mean(np.abs(a[1:] - a[:-1]))) + for a in [x, y, d]: + raw_features.append(np.mean(a[1:] - a[:-1])) + + dx = x[:-1] - x[1:] + dy = y[:-1] - y[1:] + succ_angles = np.arctan2(dy, dx) + raw_features.append(np.mean(succ_angles))# 28: mean subsequent angle + return raw_features + + def get_heatmap_features(self, start_index, end_index): + """ + computes a heatmap over the raw gaze positions, in a 8x8 grid + """ + x,y,d = self.get_x_y_d_part(start_index, end_index) + + xmin = np.percentile(x, 2.5) + xmax = np.percentile(x, 97.5) + ymin = np.percentile(y, 2.5) + ymax = np.percentile(y, 97.5) + + heatmap, xedges, yedges = np.histogram2d(x, y, bins=(8, 8), range=[[xmin, xmax], [ymin, ymax]]) + normalised_flat_heatmap = heatmap.flatten() / np.sum(heatmap) + return normalised_flat_heatmap + + def get_sacc_ngram_features(self, start_index, end_index): + # find those saccades that either start or end within start_index and end_index, or start before start_index and end after end_index + mysacc = [sacc for sacc in self.saccades if (sacc[gs.sacc_start_index_i] > start_index and sacc[gs.sacc_start_index_i] < end_index) + or (sacc[gs.sacc_end_index_i] > start_index and sacc[gs.sacc_end_index_i] < end_index) + or (sacc[gs.sacc_start_index_i] < start_index and sacc[gs.sacc_end_index_i] > end_index)] + # create string representing all saccades in mysacc + mywbs = [self.wordbook_string[mysacc.index(sacc)] for sacc in mysacc] + + # create all possible n-grams of a certain length + ngrams = [] + ngrams.append(mywbs) + for n in xrange(2,5): + ngrams.append([reduce(operator.add, mywbs[i:i+n]) for i in range(len(mywbs) - n)]) + + # compute histograms of the actual n-grams occuring in the data + histograms=[] + for i in xrange(0,4): + histograms.append(dict((x, ngrams[i].count(x)) for x in self.sacc_bins[i])) + + # compute features from each histogram and append them to one list + sacc_ngram_features = [] + for h in histograms: + wb_feat = self.get_ngram_features(h) + sacc_ngram_features.extend(wb_feat) + + return sacc_ngram_features + + def get_saccFix_wb_string_saccades(self, sacc): + """ + returns a string for a single saccade that will be used for n-gram features based on saccades and fixations + """ + amplitude = sacc[gs.sacc_amplitude_i] + angle_rad = sacc[gs.sacc_angle_i] + angle_deg = np.true_divide(angle_rad * 180.0, np.pi) + + # 0 degrees is pointing to the right + if angle_deg < 45: + wb_str = 'r' + elif angle_deg < 135: + wb_str = 'u' + elif angle_deg < 225: + wb_str = 'l' + elif angle_deg < 315: + wb_str = 'd' + else: + wb_str = 'r' + + if amplitude >= 2 * self.fixation_radius_threshold: # less than 2 fixation_radius_thresholds + wb_str = wb_str.upper() + + return wb_str + + def get_saccFix_wb_string_fixations(self, fix): + """ + returns a string for a single fixation that will be used for n-gram features based on saccades and fixations + """ + if fix[gs.fix_end_t_i] - fix[gs.fix_start_t_i] < 2 * self.fixation_duration_threshold: + return 'f' + else: + return 'F' + + def get_saccFix_ngram_features(self, start_index, end_index): + """ + computes n-gram features based on saccades and fixations + """ + # find all saccades and fixations between start_index and end_index, + # and create a string of their encodings + sacc_index = 0 + fix_index = 0 + wordbook_string = [] + + sacc_start_i = gs.sacc_start_index_i + sacc_end_i = gs.sacc_end_index_i + fix_start_i = gs.fix_start_index_i + fix_end_i = gs.fix_end_index_i + + while (self.saccades[sacc_index][sacc_end_i] < start_index) and (sacc_index < len(self.saccades) - 1): + sacc_index += 1 + + while (self.fixations[fix_index][fix_end_i] < start_index) and (fix_index < len(self.fixations) - 1): + fix_index += 1 + + while sacc_index < len(self.saccades) and fix_index < len(self.fixations): + if (self.saccades[sacc_index][sacc_start_i] < end_index) and (self.fixations[fix_index][fix_start_i] < end_index): + if self.saccades[sacc_index][sacc_start_i] < self.fixations[fix_index][fix_start_i]: + wordbook_string.append(self.get_saccFix_wb_string_saccades(self.saccades[sacc_index])) + sacc_index += 1 + else: + wordbook_string.append(self.get_saccFix_wb_string_fixations(self.fixations[fix_index])) + fix_index += 1 + elif self.saccades[sacc_index][sacc_start_i] < end_index: + wordbook_string.append(self.get_saccFix_wb_string_saccades(self.saccades[sacc_index])) + sacc_index += 1 + elif self.fixations[fix_index][fix_start_i] < end_index: + wordbook_string.append(self.get_saccFix_wb_string_fixations(self.fixations[fix_index])) + fix_index += 1 + else: + sacc_index += 1 + fix_index += 1 + + # compute all possible n-grams + ngrams = [] + ngrams.append(wordbook_string) + for n in xrange(2,5): + ngrams.append([reduce(operator.add, wordbook_string[i:i+n]) for i in range(len(wordbook_string) - n)]) + + # compute histograms for n-grams + histograms=[] + for i in xrange(0,4): + histograms.append(dict((x, ngrams[i].count(x)) for x in self.saccFix_bins[i])) + + # compute features from each histogram and append to one list + ngram_features = [] + for h in histograms: + wb_feat = self.get_ngram_features(h) + ngram_features.extend(wb_feat) + + return ngram_features + + def get_FixSacWb_timed(self, start_index, end_index): + """ + returns list of fixations, saccades, blinks and the associated wordbook_string + for the time between start_index and end_index + """ + myfix = [fix for fix in self.fixations if (fix[gs. fix_start_index_i] > start_index + and fix[gs. fix_start_index_i] < end_index) + or (fix[gs.fix_end_index_i]>start_index and fix[gs.fix_end_index_i]end_index)] + + mysacc = [sacc for sacc in self.saccades if (sacc[gs.sacc_start_index_i]>start_index and sacc[gs.sacc_start_index_i]start_index and sacc[gs.sacc_end_index_i]end_index)] + + mywbs = [self.wordbook_string[self.saccades.index(sacc)] for sacc in mysacc] + blinks = [b for b in self.blinks if (b[gs.blink_start_index_i]>start_index and b[gs.blink_start_index_i]start_index and b[gs.blink_end_index_i]end_index)] + + return myfix, mysacc, mywbs, blinks + + def get_sliding_windows(self, start_index, end_index, sliding_window_size, sliding_window_step_size): + """ + computes a list of time windows resulting from the sliding windows approach with the given sliding_window_size and sliding_window_step_size + """ + window_times = [] # consisting of lists [a,b,t_a,t_b] where a is start index, b ist end index + + a = start_index + b = min(self.getEndWindow(a, sliding_window_size), end_index) + if self.check_sliding_window_conditions(a, b, end_index, sliding_window_size): + window_times.append([a, b, self.gaze[a, self.ti], self.gaze[b, self.ti]]) + + while b < end_index: + a = self.getStartWindow(a, sliding_window_step_size) + b = self.getEndWindow(a, sliding_window_size) + # check if some conditions are fulfilled and if so, add to the window list + if self.check_sliding_window_conditions(a, b, end_index, sliding_window_size): + window_times.append([a, b, self.gaze[a, self.ti], self.gaze[b, self.ti]]) + return window_times + + def check_sliding_window_conditions(self, a, b, end_index, sliding_window_size): + # discard too short or too long sliding windows + window_duration = self.gaze[b, self.ti] - self.gaze[a, self.ti] + min_duration = sliding_window_size - 0.1*sliding_window_size + max_duration = sliding_window_size + 0.1*sliding_window_size + if window_duration < min_duration or window_duration>max_duration: + return False + + if b <= end_index: + errors_in_window = np.sum(self.errors[a:(b+1)]) / (b-a) + + if errors_in_window < 0.5: + # discard window if no saccade or fixation was detected + fixations, saccades, wordbook_string, blinks = self.get_FixSacWb_timed(a,b) + if len(fixations) == 0 and len(saccades) == 0: + return False + + xgaze = self.gaze[a:(b+1), self.xi][np.logical_not(self.errors[a:(b+1)])] + ygaze = self.gaze[a:(b+1), self.yi][np.logical_not(self.errors[a:(b+1)])] + + # discard window if less than 5 samples + if len(xgaze) < 5: + return False + + # exclude windows with more than 66% constant gaze + (xvals, xcounts) = np.unique(xgaze, return_counts=True) + (yvals, ycounts) = np.unique(ygaze, return_counts=True) + xcounts = xcounts / float(np.sum(xcounts)) + ycounts = ycounts / float(np.sum(ycounts)) + if xcounts.max() > 0.66 or ycounts.max() > 0.66: + return False + + #accept every remaining window + return True + else: + # discard windows with more than 50% erroneous samples + return False + return False + + def getStartWindow(self, old_start, sliding_window_step_size): + start = old_start + starttime = self.gaze[old_start, self.ti] + while starttime < self.gaze[old_start, self.ti] + sliding_window_step_size: + start += 1 + if start >= len(self.gaze[:, self.xi])-1: + return len(self.gaze[:, self.xi])-1 + starttime = self.gaze[start, self.ti] + return start + + def getEndWindow(self, start, sliding_window_size): + end = start + while self.gaze[end, self.ti] < self.gaze[start, self.ti] + sliding_window_size: + end += 1 + if end >= len(self.gaze[:, self.xi])-1: + return len(self.gaze[:, self.xi])-1 + return end + + def get_ngram_features(self, wb): + feat = [] + feat.append(sum(x > 0 for x in wb.values())) # 1. size + feat.append(np.max(wb.values())) # 2. maximum + nonzeros = [i for i in wb.values() if i] + if len(nonzeros)<1: + feat.append(0) + else: + feat.append(min(nonzeros)) # 3. non-zero minimum + feat.append(np.argmax(wb.values())) # 2. arg max + if len(nonzeros)<1: + feat.append(0) + else: + feat.append(np.argmin(np.array(nonzeros))) # 3. arg min + feat.append(feat[1] - feat[2]) # 4. diff max - min + feat.append(np.mean(wb.values())) # 5. mean of all counts + feat.append(np.var(wb.values())) # 6. var of all counts + return feat