Add files via upload

mira · Aug 16, 2022 · 8b65ce7 · 8b65ce7
1 parent 3f1bd39
commit 8b65ce7
Show file tree

Hide file tree

Showing 99 changed files with 13,183 additions and 0 deletions.
diff --git a/Visualization_Module/GMV_phenotype_script_07_2022.m b/Visualization_Module/GMV_phenotype_script_07_2022.m
@@ -0,0 +1,159 @@
+%% correlation analyses between phenotypic features and GMV
+addpath(genpath('/volume/projects/DP_FEF/ScrFun/ScriptsRepository/'));
+analysis_folder = '/volume/projects/ST_Trauma_MDD/Analysis/30-May-2022/MDD_singleitems_633_IQRadd_NCV55_noval_min10_4040_5000AUC_boot/final_results/Phen_voxels_corr/voxels_y/'; %Output folder
+
+% load results
+load('/volume/projects/ST_Trauma_MDD/Analysis/30-May-2022/MDD_singleitems_633_IQRadd_NCV55_noval_min10_4040_5000AUC_boot/final_results/result_BS_final_vis_final_vis.mat');
+% load cortical/cerebellar atlases
+temp = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_3mm_633_MDD_Trauma_NM_X.mat');
+fields=fieldnames(temp);
+brain_atlas = temp.(fields{1});
+
+temp = load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_633_MDD_Trauma_X.mat');
+fields=fieldnames(temp);
+cerebellum_atlas = temp.(fields{1});
+
+temp = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/BNA_table.mat');
+fields=fieldnames(temp);
+BNA_table = temp.(fields{1});
+
+temp = load(input.X);
+fields=fieldnames(temp);
+X = temp.(fields{1});
+Y = input.Y;
+
+% choose whether you want to use brain regions or just single voxels for
+% correlation
+correlation_setup = 1; % 1 = single voxel with highest salience, 2 = brain regions with highest percentage
+
+% choose whether you want deflation steps after first LV
+deflation_setup = 2; % 1 = use deflation, 2 = do not use deflation
+
+% choose whether rescaling shall be activated
+correct_setup = 2; % 1 = use correcting, 2 = do not use correcting
+cs_method.correction_subgroup = input.cs_method.correction_subgroup;
+if ~isempty(cs_method.correction_subgroup)
+    cs_method.subgroup_train = contains(input.DiagNames, cs_method.correction_subgroup);
+    cs_method.subgroup_test = contains(input.DiagNames, cs_method.correction_subgroup);
+else
+    cs_method.subgroup_train = [];
+    cs_method.subgroup_test = [];
+end
+
+% choose scaling
+scaling_setup = 2; % 1 = use rescaling, 2 = do not use rescaling
+cs_method.method      = 'min_max'; % Scaling of features, default: mean-centering, also possible 'min_max' (scaling from 0 to 1) => preferred scaling is mean-centering!
+
+% define phenotypic items
+variables_selected =      {'age', 'BDI2_20', 'CTQ_09', 'female_sex', 'SPI_A_A2_1_1_red', 'male_sex'};
+
+% define brain regions (used only when correlation_setup is set at 2 =
+% brain regions, if correlation_setup is set at 1, this will become
+% arbitrary since the loop finds the voxels itself and locates it in the
+% right region and hemisphere
+regions_selected =        {'MTG', 'Hipp', 'MTG', 'pSTS','Amyg', 'BG'};
+hemispheres_selected =    {'right', 'right', 'right', 'right', 'left', 'left'};
+
+% Use nicer colors
+nice_blue = [0 0.4470 0.7410];
+nice_red = [0.8500 0.3250 0.0980];
+
+log_u = matches(output.parameters_names, 'u');
+log_v = matches(output.parameters_names, 'v');
+
+for i=1:(size(output.final_parameters,1)-1)
+
+    switch correlation_setup
+        case 1
+            u = output.final_parameters{i,log_u};
+            [max_value, index_region_extract] = max(abs(u));
+            region_extract = zeros(size(u))';
+            region_extract(index_region_extract) = 1;
+
+            region_number_temp = brain_atlas(index_region_extract);
+
+            if region_number_temp == 0
+                region_number_temp = cerebellum_atlas(index_region_extract);
+
+            else
+                hemispheres_names = {'left', 'right'};
+                [row_temp, column_temp] = find(BNA_table{:, hemispheres_names} == region_number_temp);
+                regions_selected{1,i} = BNA_table{row_temp, 'regions'}{1};
+                hemispheres_selected{1,i} = hemispheres_names{column_temp};
+            end
+
+
+        case 2
+            region_indices = BNA_table{contains(BNA_table.regions, regions_selected{i}, 'IgnoreCase', false), hemispheres_selected{i}};
+            region_extract = ismember(round(brain_atlas), region_indices);
+
+
+    end
+
+    if i==1
+        switch correct_setup % perform correction and scaling
+            case 1 % with site correction
+                COV = input.covariates;
+            case 2 % without site correction
+                COV = nan(size(input.covariates,1),1);
+        end
+    else
+        COV = nan(size(input.covariates,1),1);
+    end
+
+    switch scaling_setup % perform scaling
+        case 1 % with scaling
+            X = dp_correctscale(X,COV, cs_method);
+            Y = dp_correctscale(Y,COV, cs_method);
+    end
+
+    switch deflation_setup
+        case 1
+            if i > 1
+                % deflation step
+                u = output.final_parameters{i-1, 4};
+                v = output.final_parameters{i-1, 5};
+                [X,Y] = proj_def(X, Y, u, v);
+                switch scaling_setup % perform scaling
+                    case 1 % with scaling
+                        X = dp_correctscale(X,COV, cs_method);
+                        Y = dp_correctscale(Y,COV, cs_method);
+                end
+            end
+
+    end
+
+    brain_volumes = X*region_extract';
+    log_y = matches(input.Y_names, variables_selected{i});
+    x = brain_volumes;
+    y = Y(:, log_y);
+    if dp_binary_check(y)
+        [p(i), h, stats] = ranksum(x(y==0),x(y==1));
+        boxplot(x,y);
+        title(['LV', num2str(i), ', Ranksum = ', num2str(stats.zval), ', P value = ', num2str(p(i))]); % add third line
+    else
+        [rho(i), p(i)] = corr(x,y, 'type', 'Spearman');
+        if rho(i) < 0
+            scatter(y,x, 'filled', 'blue');
+        else
+            scatter(y,x, 'filled', 'red');
+        end
+        lsline
+        title(['LV', num2str(i), ', Spearman''s Rho = ', num2str(rho(i)), ', P value = ', num2str(p(i))]); % add third line
+    end
+
+    fontsize = 8;
+    switch correlation_setup
+        case 1
+            ylabel(['voxel in ', num2str(index_region_extract), ' ', regions_selected{i}, ' ', hemispheres_selected{i}], 'FontSize', fontsize);
+        case 2
+            ylabel([regions_selected{i}, ' ', hemispheres_selected{i}], 'FontSize', fontsize);
+    end
+    xlabel(strrep(variables_selected{i}, '_', ' '), 'FontSize', fontsize);
+    set(gcf,'Position', get(0,'Screensize'));
+    set(gcf,'PaperPositionMode','auto')
+    print(gcf, [analysis_folder, '/LV_' num2str(i), '_GMV_phenotype_corr'], '-dpng', '-r0');
+    saveas(gcf, [analysis_folder, '/LV_' num2str(i), '_GMV_phenotype_corr.fig']);
+    saveas(gcf,[analysis_folder, '/LV_' num2str(i), '_GMV_phenotype_corr'],'epsc');
+    close all
+end
diff --git a/Visualization_Module/dp_ConfidenceInterval.m b/Visualization_Module/dp_ConfidenceInterval.m
@@ -0,0 +1,21 @@
+%% compute confidence interval from dataset
+
+function CI = dp_ConfidenceInterval(IN)
+
+data = IN.data;
+if isfield(IN, 'sided')
+    sided = IN.sided;
+else
+    sided = 2;
+end
+
+SEM = nanstd(data)/sqrt(length(data));            % Standard Error
+switch sided
+    case 1
+        ts = tinv([0  0.95],length(data)-1);         % T-Score
+    case 2
+        ts = tinv([0.025  0.975],length(data)-1);         % T-Score
+end
+CI = nanmean(data) + ts*SEM;                      % Confidence Intervals
+
+end
diff --git a/Visualization_Module/dp_FDR.m b/Visualization_Module/dp_FDR.m
@@ -0,0 +1,27 @@
+%% DP function for FDR correction
+function [pvalue_FDR] = dp_FDR(pvalues, FDRvalue)
+
+if ~exist('FDRvalue','var')
+    FDRvalue = 0.05;
+end
+
+pvalues_sorted = sort(pvalues);
+m = numel(pvalues);
+k = 1:m;
+ratio = size(pvalues,1)/size(pvalues,2);
+
+if ratio >= 1
+    FDRthreshold = ((k*FDRvalue)/m)';
+else
+    FDRthreshold = (k*FDRvalue)/m;
+end
+
+decision = pvalues_sorted <= FDRthreshold;
+kmax = find(decision, 1, 'last');
+pvalue_FDR = pvalues_sorted(kmax);
+
+if isempty(pvalue_FDR)
+    pvalue_FDR = 0;
+end
+
+end
diff --git a/Visualization_Module/dp_FDR_adj.m b/Visualization_Module/dp_FDR_adj.m
@@ -0,0 +1,45 @@
+%% DP function for FDR correction
+function [pvalues_adj, pvalue_FDR] = dp_FDR_adj(pvalues, FDRvalue)
+
+pvalues_adj = pvalues;
+
+temp = pvalues(:);
+
+try temp = temp(:);
+catch
+end
+
+temp_save = temp;
+temp(isnan(temp))=[];
+
+if ~exist('FDRvalue','var')
+    FDRvalue = 0.05;
+end
+
+pvalues_sorted = sort(temp);
+m = numel(temp);
+k = 1:m;
+ratio = size(temp,1)/size(temp,2);
+
+if ratio >= 1
+    FDRthreshold = ((k*FDRvalue)/m)';
+else
+    FDRthreshold = (k*FDRvalue)/m;
+end
+
+decision = pvalues_sorted < FDRthreshold;
+kmax = find(decision, 1, 'last');
+pvalue_FDR = FDRthreshold(kmax);
+
+if isempty(pvalue_FDR)
+    pvalue_FDR = FDRvalue/m;
+end
+
+pvalues_adj_temp = temp.*(FDRvalue/pvalue_FDR);
+temp_save(~isnan(temp_save))=pvalues_adj_temp;
+
+pvalues_adj(:) = temp_save;
+
+pvalues_adj(pvalues_adj>1)=rescale(pvalues_adj(pvalues_adj>1), 0.9, 1);
+
+end
diff --git a/Visualization_Module/dp_ICV_bash_job.m b/Visualization_Module/dp_ICV_bash_job.m
@@ -0,0 +1,37 @@
+%% Script to create bash files
+
+function [output_file] = dp_ICV_bash_job(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request)
+
+switch type_analysis
+    case 'hyperopt'
+        comp_path = [spls_standalone_path '/hyperopt/for_testing/run_hyperopt.sh /opt/matlab/R2020a  ',...
+            '$SGE_TASK_ID ' analysis_folder ];
+    case 'permutation'
+        comp_path = [spls_standalone_path '/permutation/for_testing/run_permutation.sh /opt/matlab/R2020a  ',...
+            '$SGE_TASK_ID ' analysis_folder ];
+    case 'bootstrap'
+        comp_path = [spls_standalone_path '/bootstrap/for_testing/bootstrap ',...
+            '$SGE_TASK_ID ' analysis_folder ];
+end
+
+FID = fopen([analysis_folder '/' type_analysis '.sh'],'w');
+
+fprintf(FID,['#!/bin/bash \n',...
+    '# \n',...
+    '#$ -S /bin/bash \n',...
+    '#$ -cwd \n',...
+    '#$ -o ' analysis_folder '/output-' type_analysis '.txt #output directory \n',...
+    '#$ -j y \n',...
+    '#$ -q ', queue_name, ' \n',...
+    '#$ -N ' type_analysis ' # Name of the job \n',...
+    '#$ -t 1-' num2str(total_jobs) ' \n',...
+    '#$ -tc ' num2str(parallel_jobs) ' \n',...
+    '#$ -soft -l h_vmem=', num2str(mem_request), ' \n',...
+    'export MCR_CACHE_ROOT=/volume/mitnvp1_scratch/DP_SPLS \n',...
+    comp_path]);
+
+fclose(FID);
+output_file = [analysis_folder '/' type_analysis '.sh'];
+%     '#$ -pe smp 10 \n',...
+
+end
diff --git a/Visualization_Module/dp_ICV_bash_job_mult.m b/Visualization_Module/dp_ICV_bash_job_mult.m
@@ -0,0 +1,37 @@
+%% Script to create bash files
+
+function [output_file] = dp_ICV_bash_job_mult(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request, matlab_version, compilation_subpath, cache_path)
+
+switch type_analysis
+    case 'hyperopt'
+        comp_path = [spls_standalone_path '/hyperopt/', compilation_subpath, '/run_hyperopt.sh /opt/matlab/', matlab_version, '  ',...
+            '$SGE_TASK_ID ' analysis_folder ];
+    case 'permutation'
+        comp_path = [spls_standalone_path '/permutation/', compilation_subpath, '/run_permutation.sh /opt/matlab/', matlab_version, '  ',...
+            '$SGE_TASK_ID ' analysis_folder ];
+    case 'bootstrap'
+        comp_path = [spls_standalone_path '/bootstrap/', compilation_subpath, '/run_bootstrap.sh /opt/matlab/', matlab_version, '  ',...
+            '$SGE_TASK_ID ' analysis_folder ];
+end
+ % for_redistribution_files_only
+FID = fopen([analysis_folder '/' type_analysis '.sh'],'w');
+
+fprintf(FID,['#!/bin/bash \n',...
+    '# \n',...
+    '#$ -S /bin/bash \n',...
+    '#$ -cwd \n',...
+    '#$ -o ' analysis_folder '/output-' type_analysis '.txt #output directory \n',...
+    '#$ -j y \n',...
+    '#$ -q ', queue_name, ' \n',...
+    '#$ -N ' type_analysis ' # Name of the job \n',...
+    '#$ -t 1-' num2str(total_jobs) ' \n',...
+    '#$ -tc ' num2str(parallel_jobs) ' \n',...
+    '#$ -soft -l h_vmem=', num2str(mem_request), ' \n',...
+    'export MCR_CACHE_ROOT=', cache_path, ' \n',...
+    comp_path]);
+
+fclose(FID);
+output_file = [analysis_folder '/' type_analysis '.sh'];
+%     '#$ -pe smp 10 \n',...
+
+end
diff --git a/Visualization_Module/dp_ICV_bootstrap_csv.m b/Visualization_Module/dp_ICV_bootstrap_csv.m
@@ -0,0 +1,54 @@
+%% function for permutation testing
+
+function dp_ICV_bootstrap_csv(i, analysis_folder)
+
+m_setup = matfile([analysis_folder '/bootstrap_setup.mat']);
+
+if m_setup.selection_train == 1
+    m_data = matfile([analysis_folder, '/bootstrap_partition_fold.mat']);
+    m_opt = matfile([analysis_folder '/bootstrap_opt.mat']);
+end
+
+% 1) retrain on single test splits within
+% folds, then merge, 2) retrain on all inner folds
+% separately, then merge with mean or median, 3)
+% retrain on entirety of inner folds, 4) use already
+% existing u and v from inner folds without retraining
+
+X = m_data.train_data_x;
+Y = m_data.train_data_y;
+covars = m_data.train_covariates;
+labels = m_data.train_DiagNames;
+
+cs_method_bootstrap = m_data.cs_method;
+correction_target = m_setup.correction_target;
+
+[~,bootsam] = bootstrp(m_setup.size_sets_bootstrap,[],1:size(Y,1));
+
+% perform procrustean transformation to minimize rotation effects of
+% permutated y matrix, if V_opt available
+RHO_boot=[]; u_boot=[]; v_boot=[];
+for ii=1:size(bootsam,2)
+    X_boot = X(bootsam(:,ii),:);
+    Y_boot = Y(bootsam(:,ii),:);
+    covars_boot = covars(bootsam(:,ii),:);
+    labels_boot = labels(bootsam(:,ii),:);
+
+    cs_method_bootstrap.subgroup_train = matches(labels_boot, cs_method_bootstrap.correction_subgroup);
+    cs_method_bootstrap.subgroup_test = matches(labels_boot, cs_method_bootstrap.correction_subgroup);
+    [OUT_x, OUT_y] = dp_master_correctscale(X_boot, Y_boot, covars_boot, cs_method_bootstrap, correction_target);
+    cu = m_opt.cu_opt;
+    cv = m_opt.cv_opt;
+
+    if ~islogical(m_opt.V_opt)
+        [RHO_boot(1,ii), u_boot(:,ii), v_boot(:,ii), ~, ~, ~] = dp_spls_full(OUT_x,OUT_y,OUT_x, OUT_y, cu, cv, m_setup.correlation_method, m_opt.V_opt);
+    else
+        [RHO_boot(1,ii), u_boot(:,ii), v_boot(:,ii), ~, ~, ~] = dp_spls_full(OUT_x,OUT_y,OUT_x, OUT_y, cu, cv, m_setup.correlation_method);
+    end
+end
+
+writematrix(RHO_boot,[analysis_folder, '/RHO_results_', i, '.csv'],'Delimiter','tab')
+writematrix(u_boot,[analysis_folder, '/u_results_', i, '.csv'],'Delimiter','tab')
+writematrix(v_boot,[analysis_folder, '/v_results_', i, '.csv'],'Delimiter','tab')
+
+end