From 8b65ce79bf1435e065bf5c7484343f4a50c7ca11 Mon Sep 17 00:00:00 2001 From: David Popovic Date: Tue, 16 Aug 2022 15:40:57 +0200 Subject: [PATCH] Add files via upload --- .../GMV_phenotype_script_07_2022.m | 159 +++ Visualization_Module/dp_ConfidenceInterval.m | 21 + Visualization_Module/dp_FDR.m | 27 + Visualization_Module/dp_FDR_adj.m | 45 + Visualization_Module/dp_ICV_bash_job.m | 37 + Visualization_Module/dp_ICV_bash_job_mult.m | 37 + Visualization_Module/dp_ICV_bootstrap_csv.m | 54 + Visualization_Module/dp_ICV_hyperopt.m | 61 + Visualization_Module/dp_ICV_hyperopt_csv.asv | 59 + Visualization_Module/dp_ICV_hyperopt_csv.m | 58 + Visualization_Module/dp_ICV_hyperopt_serial.m | 34 + Visualization_Module/dp_ICV_main.m | 80 ++ Visualization_Module/dp_ICV_main_csv.m | 30 + Visualization_Module/dp_ICV_main_csv_mult.m | 74 ++ Visualization_Module/dp_ICV_main_draper.m | 64 + Visualization_Module/dp_ICV_main_mf.m | 18 + Visualization_Module/dp_ICV_main_txt.m | 43 + Visualization_Module/dp_ICV_main_txt.m~ | 49 + Visualization_Module/dp_ICV_permutation.m | 76 ++ .../dp_ICV_permutation_csv.asv | 57 + Visualization_Module/dp_ICV_permutation_csv.m | 53 + .../dp_ICV_permutation_serial.m | 64 + Visualization_Module/dp_ICV_spls.m | 30 + Visualization_Module/dp_LSOVpartition.m | 21 + Visualization_Module/dp_RHO_avg_10_sets.m | 49 + Visualization_Module/dp_RHO_b_100_sets.m | 39 + Visualization_Module/dp_RHO_fullpara.m | 51 + Visualization_Module/dp_RHO_fullpara_slurm.m | 51 + Visualization_Module/dp_RHO_fullpara_time.m | 51 + Visualization_Module/dp_RHO_parallel.m~ | 59 + Visualization_Module/dp_atlas_find.m | 17 + Visualization_Module/dp_atlas_table_readout.m | 180 +++ Visualization_Module/dp_auc_testing.m | 37 + Visualization_Module/dp_bootstrap_pruning.m | 39 + Visualization_Module/dp_bootstrap_sampling | 2 + Visualization_Module/dp_bootstrap_sampling.m | 122 ++ Visualization_Module/dp_chi2.m | 92 ++ Visualization_Module/dp_ciss_groups.m | 101 ++ Visualization_Module/dp_ciss_groups.m~ | 97 ++ Visualization_Module/dp_cleanup_files.m | 12 + Visualization_Module/dp_corrections.asv | 14 + Visualization_Module/dp_corrections.m | 15 + Visualization_Module/dp_corrections_multi.m | 25 + Visualization_Module/dp_correctscale.m | 65 + .../dp_correctscale_extract.m | 65 + Visualization_Module/dp_correctscale_mult.m | 65 + Visualization_Module/dp_correctscale_multi.m | 67 + Visualization_Module/dp_create_folders.m | 17 + Visualization_Module/dp_ctq_groups.m | 153 +++ Visualization_Module/dp_ctq_groups_new.m | 101 ++ Visualization_Module/dp_cu_cv.m | 95 ++ Visualization_Module/dp_cu_cv_ext.m | 73 + Visualization_Module/dp_decision_voxel.m | 35 + Visualization_Module/dp_deflatescale.m | 13 + Visualization_Module/dp_fdr_posthoc_adjust.m | 53 + Visualization_Module/dp_fitlm.m | 17 + Visualization_Module/dp_get_latent_scores.m | 78 ++ Visualization_Module/dp_gm_volume.m | 62 + Visualization_Module/dp_gm_volume.m~ | 48 + Visualization_Module/dp_hyperopt_ICV.m | 37 + Visualization_Module/dp_hyperopt_ICV.m~ | 49 + Visualization_Module/dp_k_split.m | 20 + Visualization_Module/dp_master_correctscale.m | 27 + Visualization_Module/dp_projection.m | 24 + Visualization_Module/dp_projection_ext.m | 24 + Visualization_Module/dp_regions_table.m | 164 +++ Visualization_Module/dp_regions_table_new.m | 154 +++ Visualization_Module/dp_regions_table_new.m~ | 153 +++ Visualization_Module/dp_resample_image.m | 19 + Visualization_Module/dp_resample_image.m~ | 18 + .../dp_results_collection_new.m | 290 ++++ Visualization_Module/dp_setup_framework.m | 68 + .../dp_setup_framework_draper.m | 75 ++ Visualization_Module/dp_setup_parameters.m | 148 +++ Visualization_Module/dp_slurm_parallel.m | 38 + Visualization_Module/dp_sociodemographic.m | 755 +++++++++++ .../dp_sociodemographic_2020.m | 524 ++++++++ .../dp_sociodemographic_2020.m~ | 519 ++++++++ .../dp_sociodemographic_2020_full.m | 684 ++++++++++ Visualization_Module/dp_spls.m | 281 ++++ Visualization_Module/dp_spls_full.m | 17 + Visualization_Module/dp_spls_resample.m | 274 ++++ Visualization_Module/dp_spls_slim.m | 17 + Visualization_Module/dp_standardize.m | 48 + Visualization_Module/dp_standardize_comb.m | 10 + Visualization_Module/dp_trainmerge.m | 37 + Visualization_Module/dp_trainmerge_single.m | 16 + Visualization_Module/dp_visualize_data.m | 584 ++++++++ Visualization_Module/dp_visualize_data.m~ | 587 ++++++++ Visualization_Module/dp_visualize_data_Dev.m | 955 +++++++++++++ .../dp_visualize_data_Devmod.m | 810 +++++++++++ .../dp_visualize_data_multi.m | 1179 +++++++++++++++++ .../dp_visualize_data_multi_2020.m | 930 +++++++++++++ Visualization_Module/fdr.m | 134 ++ Visualization_Module/hyperopt.prj | 123 ++ Visualization_Module/parsave.m | 3 + Visualization_Module/radarplot.zip | Bin 0 -> 1968 bytes Visualization_Module/testing.m | 77 ++ Visualization_Module/wmean.zip | Bin 0 -> 1545 bytes 99 files changed, 13183 insertions(+) create mode 100644 Visualization_Module/GMV_phenotype_script_07_2022.m create mode 100644 Visualization_Module/dp_ConfidenceInterval.m create mode 100644 Visualization_Module/dp_FDR.m create mode 100644 Visualization_Module/dp_FDR_adj.m create mode 100644 Visualization_Module/dp_ICV_bash_job.m create mode 100644 Visualization_Module/dp_ICV_bash_job_mult.m create mode 100644 Visualization_Module/dp_ICV_bootstrap_csv.m create mode 100644 Visualization_Module/dp_ICV_hyperopt.m create mode 100644 Visualization_Module/dp_ICV_hyperopt_csv.asv create mode 100644 Visualization_Module/dp_ICV_hyperopt_csv.m create mode 100644 Visualization_Module/dp_ICV_hyperopt_serial.m create mode 100644 Visualization_Module/dp_ICV_main.m create mode 100644 Visualization_Module/dp_ICV_main_csv.m create mode 100644 Visualization_Module/dp_ICV_main_csv_mult.m create mode 100644 Visualization_Module/dp_ICV_main_draper.m create mode 100644 Visualization_Module/dp_ICV_main_mf.m create mode 100644 Visualization_Module/dp_ICV_main_txt.m create mode 100644 Visualization_Module/dp_ICV_main_txt.m~ create mode 100644 Visualization_Module/dp_ICV_permutation.m create mode 100644 Visualization_Module/dp_ICV_permutation_csv.asv create mode 100644 Visualization_Module/dp_ICV_permutation_csv.m create mode 100644 Visualization_Module/dp_ICV_permutation_serial.m create mode 100644 Visualization_Module/dp_ICV_spls.m create mode 100644 Visualization_Module/dp_LSOVpartition.m create mode 100644 Visualization_Module/dp_RHO_avg_10_sets.m create mode 100644 Visualization_Module/dp_RHO_b_100_sets.m create mode 100644 Visualization_Module/dp_RHO_fullpara.m create mode 100644 Visualization_Module/dp_RHO_fullpara_slurm.m create mode 100644 Visualization_Module/dp_RHO_fullpara_time.m create mode 100644 Visualization_Module/dp_RHO_parallel.m~ create mode 100644 Visualization_Module/dp_atlas_find.m create mode 100644 Visualization_Module/dp_atlas_table_readout.m create mode 100644 Visualization_Module/dp_auc_testing.m create mode 100644 Visualization_Module/dp_bootstrap_pruning.m create mode 100644 Visualization_Module/dp_bootstrap_sampling create mode 100644 Visualization_Module/dp_bootstrap_sampling.m create mode 100644 Visualization_Module/dp_chi2.m create mode 100644 Visualization_Module/dp_ciss_groups.m create mode 100644 Visualization_Module/dp_ciss_groups.m~ create mode 100644 Visualization_Module/dp_cleanup_files.m create mode 100644 Visualization_Module/dp_corrections.asv create mode 100644 Visualization_Module/dp_corrections.m create mode 100644 Visualization_Module/dp_corrections_multi.m create mode 100644 Visualization_Module/dp_correctscale.m create mode 100644 Visualization_Module/dp_correctscale_extract.m create mode 100644 Visualization_Module/dp_correctscale_mult.m create mode 100644 Visualization_Module/dp_correctscale_multi.m create mode 100644 Visualization_Module/dp_create_folders.m create mode 100644 Visualization_Module/dp_ctq_groups.m create mode 100644 Visualization_Module/dp_ctq_groups_new.m create mode 100644 Visualization_Module/dp_cu_cv.m create mode 100644 Visualization_Module/dp_cu_cv_ext.m create mode 100644 Visualization_Module/dp_decision_voxel.m create mode 100644 Visualization_Module/dp_deflatescale.m create mode 100644 Visualization_Module/dp_fdr_posthoc_adjust.m create mode 100644 Visualization_Module/dp_fitlm.m create mode 100644 Visualization_Module/dp_get_latent_scores.m create mode 100644 Visualization_Module/dp_gm_volume.m create mode 100644 Visualization_Module/dp_gm_volume.m~ create mode 100644 Visualization_Module/dp_hyperopt_ICV.m create mode 100644 Visualization_Module/dp_hyperopt_ICV.m~ create mode 100644 Visualization_Module/dp_k_split.m create mode 100644 Visualization_Module/dp_master_correctscale.m create mode 100644 Visualization_Module/dp_projection.m create mode 100644 Visualization_Module/dp_projection_ext.m create mode 100644 Visualization_Module/dp_regions_table.m create mode 100644 Visualization_Module/dp_regions_table_new.m create mode 100644 Visualization_Module/dp_regions_table_new.m~ create mode 100644 Visualization_Module/dp_resample_image.m create mode 100644 Visualization_Module/dp_resample_image.m~ create mode 100644 Visualization_Module/dp_results_collection_new.m create mode 100644 Visualization_Module/dp_setup_framework.m create mode 100644 Visualization_Module/dp_setup_framework_draper.m create mode 100644 Visualization_Module/dp_setup_parameters.m create mode 100644 Visualization_Module/dp_slurm_parallel.m create mode 100644 Visualization_Module/dp_sociodemographic.m create mode 100644 Visualization_Module/dp_sociodemographic_2020.m create mode 100644 Visualization_Module/dp_sociodemographic_2020.m~ create mode 100644 Visualization_Module/dp_sociodemographic_2020_full.m create mode 100644 Visualization_Module/dp_spls.m create mode 100644 Visualization_Module/dp_spls_full.m create mode 100644 Visualization_Module/dp_spls_resample.m create mode 100644 Visualization_Module/dp_spls_slim.m create mode 100644 Visualization_Module/dp_standardize.m create mode 100644 Visualization_Module/dp_standardize_comb.m create mode 100644 Visualization_Module/dp_trainmerge.m create mode 100644 Visualization_Module/dp_trainmerge_single.m create mode 100644 Visualization_Module/dp_visualize_data.m create mode 100644 Visualization_Module/dp_visualize_data.m~ create mode 100644 Visualization_Module/dp_visualize_data_Dev.m create mode 100644 Visualization_Module/dp_visualize_data_Devmod.m create mode 100644 Visualization_Module/dp_visualize_data_multi.m create mode 100644 Visualization_Module/dp_visualize_data_multi_2020.m create mode 100644 Visualization_Module/fdr.m create mode 100644 Visualization_Module/hyperopt.prj create mode 100644 Visualization_Module/parsave.m create mode 100644 Visualization_Module/radarplot.zip create mode 100644 Visualization_Module/testing.m create mode 100644 Visualization_Module/wmean.zip diff --git a/Visualization_Module/GMV_phenotype_script_07_2022.m b/Visualization_Module/GMV_phenotype_script_07_2022.m new file mode 100644 index 0000000..51ff489 --- /dev/null +++ b/Visualization_Module/GMV_phenotype_script_07_2022.m @@ -0,0 +1,159 @@ +%% correlation analyses between phenotypic features and GMV +addpath(genpath('/volume/projects/DP_FEF/ScrFun/ScriptsRepository/')); +analysis_folder = '/volume/projects/ST_Trauma_MDD/Analysis/30-May-2022/MDD_singleitems_633_IQRadd_NCV55_noval_min10_4040_5000AUC_boot/final_results/Phen_voxels_corr/voxels_y/'; %Output folder + +% load results +load('/volume/projects/ST_Trauma_MDD/Analysis/30-May-2022/MDD_singleitems_633_IQRadd_NCV55_noval_min10_4040_5000AUC_boot/final_results/result_BS_final_vis_final_vis.mat'); +% load cortical/cerebellar atlases +temp = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_3mm_633_MDD_Trauma_NM_X.mat'); +fields=fieldnames(temp); +brain_atlas = temp.(fields{1}); + +temp = load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_633_MDD_Trauma_X.mat'); +fields=fieldnames(temp); +cerebellum_atlas = temp.(fields{1}); + +temp = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/BNA_table.mat'); +fields=fieldnames(temp); +BNA_table = temp.(fields{1}); + +temp = load(input.X); +fields=fieldnames(temp); +X = temp.(fields{1}); +Y = input.Y; + +% choose whether you want to use brain regions or just single voxels for +% correlation +correlation_setup = 1; % 1 = single voxel with highest salience, 2 = brain regions with highest percentage + +% choose whether you want deflation steps after first LV +deflation_setup = 2; % 1 = use deflation, 2 = do not use deflation + +% choose whether rescaling shall be activated +correct_setup = 2; % 1 = use correcting, 2 = do not use correcting +cs_method.correction_subgroup = input.cs_method.correction_subgroup; +if ~isempty(cs_method.correction_subgroup) + cs_method.subgroup_train = contains(input.DiagNames, cs_method.correction_subgroup); + cs_method.subgroup_test = contains(input.DiagNames, cs_method.correction_subgroup); +else + cs_method.subgroup_train = []; + cs_method.subgroup_test = []; +end + +% choose scaling +scaling_setup = 2; % 1 = use rescaling, 2 = do not use rescaling +cs_method.method = 'min_max'; % Scaling of features, default: mean-centering, also possible 'min_max' (scaling from 0 to 1) => preferred scaling is mean-centering! + +% define phenotypic items +variables_selected = {'age', 'BDI2_20', 'CTQ_09', 'female_sex', 'SPI_A_A2_1_1_red', 'male_sex'}; + +% define brain regions (used only when correlation_setup is set at 2 = +% brain regions, if correlation_setup is set at 1, this will become +% arbitrary since the loop finds the voxels itself and locates it in the +% right region and hemisphere +regions_selected = {'MTG', 'Hipp', 'MTG', 'pSTS','Amyg', 'BG'}; +hemispheres_selected = {'right', 'right', 'right', 'right', 'left', 'left'}; + +% Use nicer colors +nice_blue = [0 0.4470 0.7410]; +nice_red = [0.8500 0.3250 0.0980]; + +log_u = matches(output.parameters_names, 'u'); +log_v = matches(output.parameters_names, 'v'); + +for i=1:(size(output.final_parameters,1)-1) + + switch correlation_setup + case 1 + u = output.final_parameters{i,log_u}; + [max_value, index_region_extract] = max(abs(u)); + region_extract = zeros(size(u))'; + region_extract(index_region_extract) = 1; + + region_number_temp = brain_atlas(index_region_extract); + + if region_number_temp == 0 + region_number_temp = cerebellum_atlas(index_region_extract); + + else + hemispheres_names = {'left', 'right'}; + [row_temp, column_temp] = find(BNA_table{:, hemispheres_names} == region_number_temp); + regions_selected{1,i} = BNA_table{row_temp, 'regions'}{1}; + hemispheres_selected{1,i} = hemispheres_names{column_temp}; + end + + + case 2 + region_indices = BNA_table{contains(BNA_table.regions, regions_selected{i}, 'IgnoreCase', false), hemispheres_selected{i}}; + region_extract = ismember(round(brain_atlas), region_indices); + + + end + + if i==1 + switch correct_setup % perform correction and scaling + case 1 % with site correction + COV = input.covariates; + case 2 % without site correction + COV = nan(size(input.covariates,1),1); + end + else + COV = nan(size(input.covariates,1),1); + end + + switch scaling_setup % perform scaling + case 1 % with scaling + X = dp_correctscale(X,COV, cs_method); + Y = dp_correctscale(Y,COV, cs_method); + end + + switch deflation_setup + case 1 + if i > 1 + % deflation step + u = output.final_parameters{i-1, 4}; + v = output.final_parameters{i-1, 5}; + [X,Y] = proj_def(X, Y, u, v); + switch scaling_setup % perform scaling + case 1 % with scaling + X = dp_correctscale(X,COV, cs_method); + Y = dp_correctscale(Y,COV, cs_method); + end + end + + end + + brain_volumes = X*region_extract'; + log_y = matches(input.Y_names, variables_selected{i}); + x = brain_volumes; + y = Y(:, log_y); + if dp_binary_check(y) + [p(i), h, stats] = ranksum(x(y==0),x(y==1)); + boxplot(x,y); + title(['LV', num2str(i), ', Ranksum = ', num2str(stats.zval), ', P value = ', num2str(p(i))]); % add third line + else + [rho(i), p(i)] = corr(x,y, 'type', 'Spearman'); + if rho(i) < 0 + scatter(y,x, 'filled', 'blue'); + else + scatter(y,x, 'filled', 'red'); + end + lsline + title(['LV', num2str(i), ', Spearman''s Rho = ', num2str(rho(i)), ', P value = ', num2str(p(i))]); % add third line + end + + fontsize = 8; + switch correlation_setup + case 1 + ylabel(['voxel in ', num2str(index_region_extract), ' ', regions_selected{i}, ' ', hemispheres_selected{i}], 'FontSize', fontsize); + case 2 + ylabel([regions_selected{i}, ' ', hemispheres_selected{i}], 'FontSize', fontsize); + end + xlabel(strrep(variables_selected{i}, '_', ' '), 'FontSize', fontsize); + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(gcf, [analysis_folder, '/LV_' num2str(i), '_GMV_phenotype_corr'], '-dpng', '-r0'); + saveas(gcf, [analysis_folder, '/LV_' num2str(i), '_GMV_phenotype_corr.fig']); + saveas(gcf,[analysis_folder, '/LV_' num2str(i), '_GMV_phenotype_corr'],'epsc'); + close all +end \ No newline at end of file diff --git a/Visualization_Module/dp_ConfidenceInterval.m b/Visualization_Module/dp_ConfidenceInterval.m new file mode 100644 index 0000000..c7474e0 --- /dev/null +++ b/Visualization_Module/dp_ConfidenceInterval.m @@ -0,0 +1,21 @@ +%% compute confidence interval from dataset + +function CI = dp_ConfidenceInterval(IN) + +data = IN.data; +if isfield(IN, 'sided') + sided = IN.sided; +else + sided = 2; +end + +SEM = nanstd(data)/sqrt(length(data)); % Standard Error +switch sided + case 1 + ts = tinv([0 0.95],length(data)-1); % T-Score + case 2 + ts = tinv([0.025 0.975],length(data)-1); % T-Score +end +CI = nanmean(data) + ts*SEM; % Confidence Intervals + +end \ No newline at end of file diff --git a/Visualization_Module/dp_FDR.m b/Visualization_Module/dp_FDR.m new file mode 100644 index 0000000..d274e36 --- /dev/null +++ b/Visualization_Module/dp_FDR.m @@ -0,0 +1,27 @@ +%% DP function for FDR correction +function [pvalue_FDR] = dp_FDR(pvalues, FDRvalue) + +if ~exist('FDRvalue','var') + FDRvalue = 0.05; +end + +pvalues_sorted = sort(pvalues); +m = numel(pvalues); +k = 1:m; +ratio = size(pvalues,1)/size(pvalues,2); + +if ratio >= 1 + FDRthreshold = ((k*FDRvalue)/m)'; +else + FDRthreshold = (k*FDRvalue)/m; +end + +decision = pvalues_sorted <= FDRthreshold; +kmax = find(decision, 1, 'last'); +pvalue_FDR = pvalues_sorted(kmax); + +if isempty(pvalue_FDR) + pvalue_FDR = 0; +end + +end diff --git a/Visualization_Module/dp_FDR_adj.m b/Visualization_Module/dp_FDR_adj.m new file mode 100644 index 0000000..1e773b9 --- /dev/null +++ b/Visualization_Module/dp_FDR_adj.m @@ -0,0 +1,45 @@ +%% DP function for FDR correction +function [pvalues_adj, pvalue_FDR] = dp_FDR_adj(pvalues, FDRvalue) + +pvalues_adj = pvalues; + +temp = pvalues(:); + +try temp = temp(:); +catch +end + +temp_save = temp; +temp(isnan(temp))=[]; + +if ~exist('FDRvalue','var') + FDRvalue = 0.05; +end + +pvalues_sorted = sort(temp); +m = numel(temp); +k = 1:m; +ratio = size(temp,1)/size(temp,2); + +if ratio >= 1 + FDRthreshold = ((k*FDRvalue)/m)'; +else + FDRthreshold = (k*FDRvalue)/m; +end + +decision = pvalues_sorted < FDRthreshold; +kmax = find(decision, 1, 'last'); +pvalue_FDR = FDRthreshold(kmax); + +if isempty(pvalue_FDR) + pvalue_FDR = FDRvalue/m; +end + +pvalues_adj_temp = temp.*(FDRvalue/pvalue_FDR); +temp_save(~isnan(temp_save))=pvalues_adj_temp; + +pvalues_adj(:) = temp_save; + +pvalues_adj(pvalues_adj>1)=rescale(pvalues_adj(pvalues_adj>1), 0.9, 1); + +end diff --git a/Visualization_Module/dp_ICV_bash_job.m b/Visualization_Module/dp_ICV_bash_job.m new file mode 100644 index 0000000..47eafad --- /dev/null +++ b/Visualization_Module/dp_ICV_bash_job.m @@ -0,0 +1,37 @@ +%% Script to create bash files + +function [output_file] = dp_ICV_bash_job(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request) + +switch type_analysis + case 'hyperopt' + comp_path = [spls_standalone_path '/hyperopt/for_testing/run_hyperopt.sh /opt/matlab/R2020a ',... + '$SGE_TASK_ID ' analysis_folder ]; + case 'permutation' + comp_path = [spls_standalone_path '/permutation/for_testing/run_permutation.sh /opt/matlab/R2020a ',... + '$SGE_TASK_ID ' analysis_folder ]; + case 'bootstrap' + comp_path = [spls_standalone_path '/bootstrap/for_testing/bootstrap ',... + '$SGE_TASK_ID ' analysis_folder ]; +end + +FID = fopen([analysis_folder '/' type_analysis '.sh'],'w'); + +fprintf(FID,['#!/bin/bash \n',... + '# \n',... + '#$ -S /bin/bash \n',... + '#$ -cwd \n',... + '#$ -o ' analysis_folder '/output-' type_analysis '.txt #output directory \n',... + '#$ -j y \n',... + '#$ -q ', queue_name, ' \n',... + '#$ -N ' type_analysis ' # Name of the job \n',... + '#$ -t 1-' num2str(total_jobs) ' \n',... + '#$ -tc ' num2str(parallel_jobs) ' \n',... + '#$ -soft -l h_vmem=', num2str(mem_request), ' \n',... + 'export MCR_CACHE_ROOT=/volume/mitnvp1_scratch/DP_SPLS \n',... + comp_path]); + +fclose(FID); +output_file = [analysis_folder '/' type_analysis '.sh']; +% '#$ -pe smp 10 \n',... + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_bash_job_mult.m b/Visualization_Module/dp_ICV_bash_job_mult.m new file mode 100644 index 0000000..7064f29 --- /dev/null +++ b/Visualization_Module/dp_ICV_bash_job_mult.m @@ -0,0 +1,37 @@ +%% Script to create bash files + +function [output_file] = dp_ICV_bash_job_mult(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request, matlab_version, compilation_subpath, cache_path) + +switch type_analysis + case 'hyperopt' + comp_path = [spls_standalone_path '/hyperopt/', compilation_subpath, '/run_hyperopt.sh /opt/matlab/', matlab_version, ' ',... + '$SGE_TASK_ID ' analysis_folder ]; + case 'permutation' + comp_path = [spls_standalone_path '/permutation/', compilation_subpath, '/run_permutation.sh /opt/matlab/', matlab_version, ' ',... + '$SGE_TASK_ID ' analysis_folder ]; + case 'bootstrap' + comp_path = [spls_standalone_path '/bootstrap/', compilation_subpath, '/run_bootstrap.sh /opt/matlab/', matlab_version, ' ',... + '$SGE_TASK_ID ' analysis_folder ]; +end + % for_redistribution_files_only +FID = fopen([analysis_folder '/' type_analysis '.sh'],'w'); + +fprintf(FID,['#!/bin/bash \n',... + '# \n',... + '#$ -S /bin/bash \n',... + '#$ -cwd \n',... + '#$ -o ' analysis_folder '/output-' type_analysis '.txt #output directory \n',... + '#$ -j y \n',... + '#$ -q ', queue_name, ' \n',... + '#$ -N ' type_analysis ' # Name of the job \n',... + '#$ -t 1-' num2str(total_jobs) ' \n',... + '#$ -tc ' num2str(parallel_jobs) ' \n',... + '#$ -soft -l h_vmem=', num2str(mem_request), ' \n',... + 'export MCR_CACHE_ROOT=', cache_path, ' \n',... + comp_path]); + +fclose(FID); +output_file = [analysis_folder '/' type_analysis '.sh']; +% '#$ -pe smp 10 \n',... + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_bootstrap_csv.m b/Visualization_Module/dp_ICV_bootstrap_csv.m new file mode 100644 index 0000000..443cf47 --- /dev/null +++ b/Visualization_Module/dp_ICV_bootstrap_csv.m @@ -0,0 +1,54 @@ +%% function for permutation testing + +function dp_ICV_bootstrap_csv(i, analysis_folder) + +m_setup = matfile([analysis_folder '/bootstrap_setup.mat']); + +if m_setup.selection_train == 1 + m_data = matfile([analysis_folder, '/bootstrap_partition_fold.mat']); + m_opt = matfile([analysis_folder '/bootstrap_opt.mat']); +end + +% 1) retrain on single test splits within +% folds, then merge, 2) retrain on all inner folds +% separately, then merge with mean or median, 3) +% retrain on entirety of inner folds, 4) use already +% existing u and v from inner folds without retraining + +X = m_data.train_data_x; +Y = m_data.train_data_y; +covars = m_data.train_covariates; +labels = m_data.train_DiagNames; + +cs_method_bootstrap = m_data.cs_method; +correction_target = m_setup.correction_target; + +[~,bootsam] = bootstrp(m_setup.size_sets_bootstrap,[],1:size(Y,1)); + +% perform procrustean transformation to minimize rotation effects of +% permutated y matrix, if V_opt available +RHO_boot=[]; u_boot=[]; v_boot=[]; +for ii=1:size(bootsam,2) + X_boot = X(bootsam(:,ii),:); + Y_boot = Y(bootsam(:,ii),:); + covars_boot = covars(bootsam(:,ii),:); + labels_boot = labels(bootsam(:,ii),:); + + cs_method_bootstrap.subgroup_train = matches(labels_boot, cs_method_bootstrap.correction_subgroup); + cs_method_bootstrap.subgroup_test = matches(labels_boot, cs_method_bootstrap.correction_subgroup); + [OUT_x, OUT_y] = dp_master_correctscale(X_boot, Y_boot, covars_boot, cs_method_bootstrap, correction_target); + cu = m_opt.cu_opt; + cv = m_opt.cv_opt; + + if ~islogical(m_opt.V_opt) + [RHO_boot(1,ii), u_boot(:,ii), v_boot(:,ii), ~, ~, ~] = dp_spls_full(OUT_x,OUT_y,OUT_x, OUT_y, cu, cv, m_setup.correlation_method, m_opt.V_opt); + else + [RHO_boot(1,ii), u_boot(:,ii), v_boot(:,ii), ~, ~, ~] = dp_spls_full(OUT_x,OUT_y,OUT_x, OUT_y, cu, cv, m_setup.correlation_method); + end +end + +writematrix(RHO_boot,[analysis_folder, '/RHO_results_', i, '.csv'],'Delimiter','tab') +writematrix(u_boot,[analysis_folder, '/u_results_', i, '.csv'],'Delimiter','tab') +writematrix(v_boot,[analysis_folder, '/v_results_', i, '.csv'],'Delimiter','tab') + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_hyperopt.m b/Visualization_Module/dp_ICV_hyperopt.m new file mode 100644 index 0000000..86e4e4e --- /dev/null +++ b/Visualization_Module/dp_ICV_hyperopt.m @@ -0,0 +1,61 @@ +%% new function for permutation testing + +function dp_ICV_hyperopt(i, analysis_folder) + +m = matfile([analysis_folder '/hyperopt_partition.mat']); + +extract_target = (str2double(i)-1)*m.size_sets_hyperopt+1; +try + cu_cv_extract = m.cu_cv_combination(extract_target:(extract_target+m.size_sets_hyperopt-1),:); +catch + cu_cv_extract = m.cu_cv_combination(extract_target:end,:); +end + +% RHO_collection_ICV = nan(size(cu_cv_extract,1),size(m.cv_inner_TrainInd,1)*size(m.cv_inner_TrainInd,2)); +u_collection_ICV = cell(size(cu_cv_extract,1),1); +v_collection_ICV = cell(size(cu_cv_extract,1),1); +train_data_x = m.train_data_x; +train_data_y = m.train_data_y; +train_covariates = m.train_covariates; + +for ii=1:size(cu_cv_extract,1) + cu = cu_cv_extract(ii,1); + cv = cu_cv_extract(ii,2); + nn=1; + for ib=1:size(m.cv_inner_TrainInd,1) + for k=1:size(m.cv_inner_TrainInd,2) + + IN_x.train = train_data_x(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + IN_x.test = train_data_x(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + IN_y.train = train_data_y(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + IN_y.test = train_data_y(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + COV.train = train_covariates(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + COV.test = train_covariates(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + OUT_x = dp_correctscale(IN_x,COV,m.scaling_method); + OUT_y = dp_correctscale(IN_y,COV,m.scaling_method); + + [RHO_collection_ICV(ii,nn), u_collection_ICV{ii,1}(:,nn), v_collection_ICV{ii,1}(:,nn), ~, ~, ~] = dp_spls_full(OUT_x.train,OUT_y.train,OUT_x.test,OUT_y.test, cu, cv, m.correlation_method); + nn=nn+1; + end + end +end + +% errorcount=1; +% while errorcount>0 +% try m_coll = matfile([analysis_folder, '/RHO_results.mat'],'Writable',true); +% m_coll.RHO_collection(extract_target:(extract_target+m.size_sets_hyperopt-1),:) = RHO_collection_ICV; +% m_coll.u_collection(extract_target:(extract_target+m.size_sets_hyperopt-1),1) = u_collection_ICV; +% m_coll.v_collection(extract_target:(extract_target+m.size_sets_hyperopt-1),1) = v_collection_ICV; +% errorcount=0; +% catch ME +% errorcount=1; +% pause(1) +% end +% end + +save([analysis_folder, '/RHO_results_', i, '.mat'], 'RHO_collection_ICV', 'u_collection_ICV', 'v_collection_ICV'); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_hyperopt_csv.asv b/Visualization_Module/dp_ICV_hyperopt_csv.asv new file mode 100644 index 0000000..76a1f8d --- /dev/null +++ b/Visualization_Module/dp_ICV_hyperopt_csv.asv @@ -0,0 +1,59 @@ +%% new function for permutation testing + +function dp_ICV_hyperopt_csv(i, analysis_folder) + +m = matfile([analysis_folder '/hyperopt_partition.mat']); + +extract_target = (str2double(i)-1)*m.size_sets_hyperopt+1; +try + cu_cv_extract = m.cu_cv_combination(extract_target:(extract_target+m.size_sets_hyperopt-1),:); +catch + cu_cv_extract = m.cu_cv_combination(extract_target:end,:); +end + +% RHO_collection_ICV = nan(size(cu_cv_extract,1),size(m.cv_inner_TrainInd,1)*size(m.cv_inner_TrainInd,2)); +train_data_x = m.train_data_x; +train_data_y = m.train_data_y; +train_covariates = m.train_covariates; +train_DiagNames = m.train_DiagNames; +cs_method_hyperopt = m.cs_method; + +for ii=1:size(cu_cv_extract,1) + cu = cu_cv_extract(ii,1); + cv = cu_cv_extract(ii,2); + nn=1; + for ib=1:size(m.cv_inner_TrainInd,1) + for k=1:size(m.cv_inner_TrainInd,2) + + IN_x.train = train_data_x(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + IN_x.test = train_data_x(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + IN_y.train = train_data_y(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + IN_y.test = train_data_y(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + COV.train = train_covariates(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + COV.test = train_covariates(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + DiagNames.train = train_DiagNames(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + DiagNames.test = train_DiagNames(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + if ~isempty(cs_method_hyperopt.correction_subgroup) + cs_method_hyperopt.subgroup_train = contains(DiagNames.train, cs_method_hyperopt.correction_subgroup); + cs_method_hyperopt.subgroup_test = contains(DiagNames.test, cs_method_hyperopt.correction_subgroup); + else + cs_method_hyperopt.subgroup_train = []; + cs_method_hyperopt.subgroup_test = []; + end + + [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, cs_method_hyperopt, m.correction_target); + + RHO_collection_ICV(ii,nn) = dp_spls_slim(OUT_x.train,OUT_y.train,OUT_x.test,OUT_y.test, cu, cv, m.correlation_method); + nn=nn+1; + end + end + ii +end + +writematrix(RHO_collection_ICV,[analysis_folder, '/RHO_results_HCcorr', i, '.csv'],'Delimiter','tab') + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_hyperopt_csv.m b/Visualization_Module/dp_ICV_hyperopt_csv.m new file mode 100644 index 0000000..3e52385 --- /dev/null +++ b/Visualization_Module/dp_ICV_hyperopt_csv.m @@ -0,0 +1,58 @@ +%% new function for permutation testing + +function dp_ICV_hyperopt_csv(i, analysis_folder) + +m = matfile([analysis_folder '/hyperopt_partition.mat']); + +extract_target = (str2double(i)-1)*m.size_sets_hyperopt+1; +try + cu_cv_extract = m.cu_cv_combination(extract_target:(extract_target+m.size_sets_hyperopt-1),:); +catch + cu_cv_extract = m.cu_cv_combination(extract_target:end,:); +end + +RHO_collection_ICV = nan(size(cu_cv_extract,1),size(m.cv_inner_TrainInd,1)*size(m.cv_inner_TrainInd,2)); +train_data_x = m.train_data_x; +train_data_y = m.train_data_y; +train_covariates = m.train_covariates; +train_DiagNames = m.train_DiagNames; +cs_method_hyperopt = m.cs_method; + +for ii=1:size(cu_cv_extract,1) + cu = cu_cv_extract(ii,1); + cv = cu_cv_extract(ii,2); + nn=1; + for ib=1:size(m.cv_inner_TrainInd,1) + for k=1:size(m.cv_inner_TrainInd,2) + + IN_x.train = train_data_x(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + IN_x.test = train_data_x(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + IN_y.train = train_data_y(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + IN_y.test = train_data_y(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + COV.train = train_covariates(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + COV.test = train_covariates(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + DiagNames.train = train_DiagNames(cell2mat(m.cv_inner_TrainInd(ib,k)),:); + DiagNames.test = train_DiagNames(cell2mat(m.cv_inner_TestInd(ib,k)),:); + + if ~isempty(cs_method_hyperopt.correction_subgroup) + cs_method_hyperopt.subgroup_train = contains(DiagNames.train, cs_method_hyperopt.correction_subgroup); + cs_method_hyperopt.subgroup_test = contains(DiagNames.test, cs_method_hyperopt.correction_subgroup); + else + cs_method_hyperopt.subgroup_train = []; + cs_method_hyperopt.subgroup_test = []; + end + + [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, cs_method_hyperopt, m.correction_target); + + RHO_collection_ICV(ii,nn) = dp_spls_slim(OUT_x.train,OUT_y.train,OUT_x.test,OUT_y.test, cu, cv, m.correlation_method); + nn=nn+1; + end + end +end + +writematrix(RHO_collection_ICV,[analysis_folder, '/RHO_results_', i, '.csv'],'Delimiter','tab') + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_hyperopt_serial.m b/Visualization_Module/dp_ICV_hyperopt_serial.m new file mode 100644 index 0000000..11aaba0 --- /dev/null +++ b/Visualization_Module/dp_ICV_hyperopt_serial.m @@ -0,0 +1,34 @@ +%% new function for permutation testing + +function [RHO_collection_ICV, u_collection_ICV, v_collection_ICV] = dp_ICV_hyperopt_serial(X, Y, Covariates, B, K, TrainInd, TestInd, cu_cv_combination, scaling_method, correlation_method) + +u_collection_ICV = cell(size(cu_cv_combination,1),1); +v_collection_ICV = cell(size(cu_cv_combination,1),1); +RHO_collection_ICV = nan(size(cu_cv_combination,1), B*K); + +for i=1:size(cu_cv_combination,1) + cu = cu_cv_combination(i,1); + cv = cu_cv_combination(i,2); + nn=1; + for b=1:B + for k=1:K + + IN_x.train = X(TrainInd{b,k},:); + IN_x.test = X(TestInd{b,k},:); + + IN_y.train = Y(TrainInd{b,k},:); + IN_y.test = Y(TestInd{b,k},:); + + COV.train = Covariates(TrainInd{b,k},:); + COV.test = Covariates(TestInd{b,k},:); + + OUT_x = dp_correctscale(IN_x,COV,scaling_method); + OUT_y = dp_correctscale(IN_y,COV,scaling_method); + + [RHO_collection_ICV(i,nn), u_collection_ICV{i,1}(:,nn), v_collection_ICV{i,1}(:,nn), ~, ~, ~] = dp_spls_full(OUT_x.train,OUT_y.train,OUT_x.test,OUT_y.test, cu, cv, correlation_method); + nn=nn+1; + end + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_main.m b/Visualization_Module/dp_ICV_main.m new file mode 100644 index 0000000..b0923e4 --- /dev/null +++ b/Visualization_Module/dp_ICV_main.m @@ -0,0 +1,80 @@ +%% function for RHO parallel computing + +function [RHO_ICV, u_ICV, v_ICV] = dp_ICV_main(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request, time_limit) + +RHO_bash = dp_ICV_bash_job(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization +target = [analysis_folder '/RHO_results_*.mat']; +mydir = size(dir(target),1); +RHO_collection = []; +u_collection = []; +v_collection = []; + +tic +while mydir (time_limit*60) + for i=1:total_jobs + if ~exist([analysis_folder '/RHO_results_' num2str(i) '.mat']) + switch type_analysis + case 'hyperopt' + dp_ICV_hyperopt(num2str(i), analysis_folder); + case 'permutation' + dp_ICV_permutation(num2str(i), analysis_folder); + end + end + end + mydir = size(dir(target),1); + end +end + +pause('on'); +pause(10); + +for i=1:total_jobs + path_mat = [analysis_folder '/RHO_results_' num2str(i),'.mat']; + m = matfile(path_mat); + RHO_collection_temp = m.RHO_collection_ICV; + u_collection_temp = m.u_collection_ICV; + v_collection_temp = m.v_collection_ICV; + + RHO_collection = [RHO_collection; RHO_collection_temp]; + u_collection = [u_collection; u_collection_temp]; + v_collection = [v_collection; v_collection_temp]; + + + temp = load(path_mat); + temp_names = fieldnames(temp); + +% RHO_collection_temp = temp.(temp_names{~cellfun(@isempty,strfind(temp_names, 'RHO'))}); +% RHO_collection = [RHO_collection; RHO_collection_temp]; +% +% u_collection_temp = temp.(temp_names{~cellfun(@isempty,strfind(temp_names, 'u_collection'))}); +% v_collection_temp = temp.(temp_names{~cellfun(@isempty,strfind(temp_names, 'v_collection'))}); +% +% u_collection = [u_collection; u_collection_temp]; +% v_collection = [v_collection; v_collection_temp]; + +% try epsilon_collection_temp = temp.(temp_names{contains(temp_names, 'epsilon_collection')}); +% omega_collection_temp = temp.(temp_names{contains(temp_names, 'omega_collection')}); +% +% epsilon_collection = [epsilon_collection; epsilon_collection_temp]; +% omega_collection = [omega_collection; omega_collection_temp]; +% catch +% epsilon_collection=[];omega_collection=[]; +% end + delete(path_mat); +end + +RHO_ICV = RHO_collection; +u_ICV = u_collection; +v_ICV = v_collection; +% epsilon_ICV = []; +% omega_ICV = []; + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_main_csv.m b/Visualization_Module/dp_ICV_main_csv.m new file mode 100644 index 0000000..cbc3851 --- /dev/null +++ b/Visualization_Module/dp_ICV_main_csv.m @@ -0,0 +1,30 @@ +%% function for RHO parallel computing + +function RHO_ICV = dp_ICV_main_csv(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request) + +RHO_bash = dp_ICV_bash_job(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization +target = [analysis_folder '/RHO_results_*.csv']; +mydir = size(dir(target),1); +RHO_collection = []; + +while mydir (time_limit*60) + for i=1:total_jobs + if ~exist([analysis_folder '/RHO_results_' num2str(i) '.mat']) + switch type_analysis + case 'hyperopt' + dp_ICV_hyperopt(num2str(i), analysis_folder); + case 'permutation' + dp_ICV_permutation(num2str(i), analysis_folder); + end + end + end + mydir = size(dir(target),1); + end + + pause('on'); + pause(10); + + for i=1:total_jobs + if exist([analysis_folder '/RHO_results_' num2str(i) '.mat'], 'file') + path_mat = [analysis_folder '/RHO_results_' num2str(i),'.mat']; + m = matfile(path_mat); + RHO_collection_temp = m.RHO_collection_ICV; + u_collection_temp = m.u_collection_ICV; + v_collection_temp = m.v_collection_ICV; + + RHO_collection = [RHO_collection; RHO_collection_temp]; + u_collection = [u_collection; u_collection_temp]; + v_collection = [v_collection; v_collection_temp]; + + temp = load(path_mat); + temp_names = fieldnames(temp); + + delete(path_mat); + + read_in = read_in+1; + end + end + +end +RHO_ICV = RHO_collection; +u_ICV = u_collection; +v_ICV = v_collection; +% epsilon_ICV = []; +% omega_ICV = []; + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_main_mf.m b/Visualization_Module/dp_ICV_main_mf.m new file mode 100644 index 0000000..41dfd1f --- /dev/null +++ b/Visualization_Module/dp_ICV_main_mf.m @@ -0,0 +1,18 @@ +%% function for sending and collecting job array results + +function [RHO_ICV, u_ICV, v_ICV] = dp_ICV_main_mf(spls_standalone_path, analysis_folder, type_analysis, total_jobs) + +RHO_bash = dp_ICV_bash_job(spls_standalone_path, analysis_folder, type_analysis, total_jobs); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% m = matfile([analysis_folder '/RHO_results.mat']); +% while any(cellfun(@isempty,m.u_collection)) +% end +% +% RHO_ICV = m.RHO_collection; +% u_ICV = m.u_collection; +% v_ICV = m.v_collection; + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_main_txt.m b/Visualization_Module/dp_ICV_main_txt.m new file mode 100644 index 0000000..4a6d26a --- /dev/null +++ b/Visualization_Module/dp_ICV_main_txt.m @@ -0,0 +1,43 @@ +%% function for RHO parallel computing + +function RHO_ICV = dp_ICV_main_txt(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request, time_limit) + +RHO_bash = dp_ICV_bash_job(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization +target = [analysis_folder '/RHO_results_*.csv']; +mydir = size(dir(target),1); +RHO_collection = []; + +tic +while mydir (time_limit*60) + for i=1:total_jobs + if ~exist([analysis_folder '/RHO_results_' num2str(i) '.csv']) + switch type_analysis + case 'hyperopt' + dp_ICV_hyperopt(num2str(i), analysis_folder); + case 'permutation' + dp_ICV_permutation(num2str(i), analysis_folder); + end + end + end + mydir = size(dir(target),1); + end +end + +for i=1:total_jobs + path_file = [analysis_folder '/RHO_results_' num2str(i),'.csv']; + RHO_collection_temp = readmatrix(path_file); + RHO_collection = [RHO_collection; RHO_collection_temp]; + delete(path_file); +end + +RHO_ICV = RHO_collection; + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_main_txt.m~ b/Visualization_Module/dp_ICV_main_txt.m~ new file mode 100644 index 0000000..09caa69 --- /dev/null +++ b/Visualization_Module/dp_ICV_main_txt.m~ @@ -0,0 +1,49 @@ +%% function for RHO parallel computing + +function RHO_ICV = dp_ICV_main_txt(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request, time_limit) + +RHO_bash = dp_ICV_bash_job(spls_standalone_path, queue_name, analysis_folder, type_analysis, total_jobs, parallel_jobs, mem_request); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization +target = [analysis_folder '/RHO_results_*.txt']; +mydir = size(dir(target),1); +RHO_collection = []; + +tic +while mydir (time_limit*60) + for i=1:total_jobs + if ~exist([analysis_folder '/RHO_results_' num2str(i) '.txt']) + switch type_analysis + case 'hyperopt' + dp_ICV_hyperopt(num2str(i), analysis_folder); + case 'permutation' + dp_ICV_permutation(num2str(i), analysis_folder); + end + end + end + mydir = size(dir(target),1); + end +end + +for i=1:total_jobs + path_mat = [analysis_folder '/RHO_results_' num2str(i),'.mat']; + m = matfile(path_mat); + RHO_collection_temp = readmatrix(filename); + + RHO_collection = [RHO_collection; RHO_collection_temp]; + + temp = load(path_mat); + temp_names = fieldnames(temp); + + delete(path_mat); +end + +RHO_ICV = RHO_collection; + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_permutation.m b/Visualization_Module/dp_ICV_permutation.m new file mode 100644 index 0000000..a421953 --- /dev/null +++ b/Visualization_Module/dp_ICV_permutation.m @@ -0,0 +1,76 @@ +%% function for permutation testing + +function dp_ICV_permutation(i, analysis_folder) + +m_setup = matfile([analysis_folder '/permutation_setup.mat']); +% load([analysis_folder '/permutation_setup.mat'],'selection_train', 'correlation_method', 'size_sets_permutation', 'perm_coding', 'scaling_method'); + +if m_setup.selection_train == 1 + m_data = matfile([analysis_folder, '/permutation_partition_fold.mat']); + m_opt = matfile([analysis_folder '/permutation_opt.mat']); + %test={'IN_x','IN_y','OCV_train_Diag','COV','cu_opt','cv_opt','V_opt'}; +elseif m_setup.selection_train == 2 + w = m_setup.perm_coding(2, (find(m_setup.perm_coding(1,:)==str2double(i)))); + m_data = matfile([analysis_folder, '/permutation_partition_fold_', num2str(w), '.mat']); + m_opt = matfile([analysis_folder '/permutation_opt.mat']); %,'cu_opt','cv_opt','V_opt'); +end + +% 1) retrain on single test splits within +% folds, then merge, 2) retrain on all inner folds +% separately, then merge with mean or median, 3) +% retrain on entirety of inner folds, 4) use already +% existing u and v from inner folds without retraining + +IN_x.train = m_data.train_data_x; +IN_x.test = m_data.test_data_x; +COV.train = m_data.train_covariates; +COV.test = m_data.test_covariates; +IN_y.train = m_data.train_data_y; +IN_y.test = m_data.test_data_y; + +for pp=1:str2double(i) + permmat = nk_PermInd2(m_setup.size_sets_permutation, m_data.train_Diag); +end + +% RHO_collection = nan(m_setup.size_sets_permutation,1); +% u_collection = cell(m_setup.size_sets_permutation,size(IN_x.train,2)); +% v_collection = cell(m_setup.size_sets_permutation,size(IN_y.train,2)); + +% intervall_fill = m_setup.perm_coding(3,str2double(i)):(m_setup.perm_coding(3,str2double(i))+m_setup.size_sets_permutation-1); + +RHO_collection_ICV = nan(m_setup.size_sets_permutation,1); +u_collection_ICV = nan(m_setup.size_sets_permutation,size(m_data.train_data_x,2)); +v_collection_ICV = nan(m_setup.size_sets_permutation,size(m_data.train_data_y,2)); + +for ii=1:m_setup.size_sets_permutation + % perform procrustean transformation to minimize rotation effects of + % permutated y matrix, if V_opt available + IN_y.train = IN_y.train(permmat(ii,:),:); + + OUT_x = dp_correctscale(IN_x,COV,m_setup.scaling_method); + OUT_y = dp_correctscale(IN_y,COV,m_setup.scaling_method); + + if ~islogical(m_opt.V_opt) + [RHO_collection_ICV(ii,1), u_collection_ICV(ii,:), v_collection_ICV(ii,:), ~, ~, ~] = dp_spls_full(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, m_opt.cu_opt, m_opt.cv_opt, m_setup.correlation_method, m_opt.V_opt); + else + [RHO_collection_ICV(ii,1), u_collection_ICV(ii,:), v_collection_ICV(ii,:), ~, ~, ~] = dp_spls_full(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, m_opt.cu_opt, m_opt.cv_opt, m_setup.correlation_method); + end + +end + +save([analysis_folder, '/RHO_results_', i, '.mat'], 'RHO_collection_ICV', 'u_collection_ICV', 'v_collection_ICV'); + +% errorcount=1; +% while errorcount>0 +% try m_coll = matfile([analysis_folder, '/RHO_results.mat'],'Writable',true); +% m_coll.RHO_collection(intervall_fill,:) = RHO_collection_ICV; +% m_coll.u_collection(intervall_fill,1) = u_collection_ICV; +% m_coll.v_collection(intervall_fill,1) = v_collection_ICV; +% errorcount=0; +% catch ME +% errorcount=1; +% pause(1) +% end +% end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_permutation_csv.asv b/Visualization_Module/dp_ICV_permutation_csv.asv new file mode 100644 index 0000000..f9cd88b --- /dev/null +++ b/Visualization_Module/dp_ICV_permutation_csv.asv @@ -0,0 +1,57 @@ +%% function for permutation testing + +function dp_ICV_permutation_csv(i, analysis_folder) + +m_setup = matfile([analysis_folder '/permutation_setup.mat']); + +if m_setup.selection_train == 1 + m_data = matfile([analysis_folder, '/permutation_partition_fold.mat']); + m_opt = matfile([analysis_folder '/permutation_opt.mat']); +elseif m_setup.selection_train == 2 + w = m_setup.perm_coding(2, (find(m_setup.perm_coding(1,:)==str2double(i)))); + m_data = matfile([analysis_folder, '/permutation_partition_fold_', num2str(w), '.mat']); + m_opt = matfile([analysis_folder '/permutation_opt.mat']); +end + +% 1) retrain on single test splits within +% folds, then merge, 2) retrain on all inner folds +% separately, then merge with mean or median, 3) +% retrain on entirety of inner folds, 4) use already +% existing u and v from inner folds without retraining + +IN_x.train = m_data.train_data_x; +IN_x.test = m_data.test_data_x; +COV.train = m_data.train_covariates; +COV.test = m_data.test_covariates; +IN_y.train = m_data.train_data_y; +IN_y.test = m_data.test_data_y; + +train_DiagNames = m.train_DiagNames; +train_DiagNames = m.train_DiagNames; + +cs_method_permutation = m.cs_method; + +for pp=1:str2double(i) + permmat = nk_PermInd2(m_setup.size_sets_permutation, m_data.train_Diag); +end + +RHO_collection_ICV = nan(m_setup.size_sets_permutation,1); + +for ii=1:m_setup.size_sets_permutation + % perform procrustean transformation to minimize rotation effects of + % permutated y matrix, if V_opt available + IN_y.train = IN_y.train(permmat(ii,:),:); + + [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, m_setup.scaling_method, m_setup.correction_target); + + if ~islogical(m_opt.V_opt) + RHO_collection_ICV(ii,1) = dp_spls_slim(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, m_opt.cu_opt, m_opt.cv_opt, m_setup.correlation_method, m_opt.V_opt); + else + RHO_collection_ICV(ii,1) = dp_spls_slim(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, m_opt.cu_opt, m_opt.cv_opt, m_setup.correlation_method); + end + +end + +writematrix(RHO_collection_ICV,[analysis_folder, '/RHO_results_', i, '.csv'],'Delimiter','tab') + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_permutation_csv.m b/Visualization_Module/dp_ICV_permutation_csv.m new file mode 100644 index 0000000..03ca575 --- /dev/null +++ b/Visualization_Module/dp_ICV_permutation_csv.m @@ -0,0 +1,53 @@ +%% function for permutation testing + +function dp_ICV_permutation_csv(i, analysis_folder) + +m_setup = matfile([analysis_folder '/permutation_setup.mat']); + +if m_setup.selection_train == 1 + m_data = matfile([analysis_folder, '/permutation_partition_fold.mat']); + m_opt = matfile([analysis_folder '/permutation_opt.mat']); +elseif m_setup.selection_train == 2 + w = m_setup.perm_coding(2, (find(m_setup.perm_coding(1,:)==str2double(i)))); + m_data = matfile([analysis_folder, '/permutation_partition_fold_', num2str(w), '.mat']); + m_opt = matfile([analysis_folder '/permutation_opt.mat']); +end + +% 1) retrain on single test splits within +% folds, then merge, 2) retrain on all inner folds +% separately, then merge with mean or median, 3) +% retrain on entirety of inner folds, 4) use already +% existing u and v from inner folds without retraining + +IN_x.train = m_data.train_data_x; +IN_x.test = m_data.test_data_x; +COV.train = m_data.train_covariates; +COV.test = m_data.test_covariates; +IN_y.train = m_data.train_data_y; +IN_y.test = m_data.test_data_y; + +cs_method_permutation = m_data.cs_method; + +for pp=1:str2double(i) + permmat = nk_PermInd2(m_setup.size_sets_permutation, m_data.train_Diag); +end + +RHO_collection_ICV = nan(m_setup.size_sets_permutation,1); + +for ii=1:m_setup.size_sets_permutation + % perform procrustean transformation to minimize rotation effects of + % permutated y matrix, if V_opt available + IN_y.train = IN_y.train(permmat(ii,:),:); + + [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, cs_method_permutation, m_setup.correction_target); + + if ~islogical(m_opt.V_opt) + RHO_collection_ICV(ii,1) = dp_spls_slim(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, m_opt.cu_opt, m_opt.cv_opt, m_setup.correlation_method, m_opt.V_opt); + else + RHO_collection_ICV(ii,1) = dp_spls_slim(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, m_opt.cu_opt, m_opt.cv_opt, m_setup.correlation_method); + end +end + +writematrix(RHO_collection_ICV,[analysis_folder, '/RHO_results_', i, '.csv'],'Delimiter','tab') + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_permutation_serial.m b/Visualization_Module/dp_ICV_permutation_serial.m new file mode 100644 index 0000000..aeae75d --- /dev/null +++ b/Visualization_Module/dp_ICV_permutation_serial.m @@ -0,0 +1,64 @@ +%% function for permutation testing + +function [RHO_collection_ICV, u_collection_ICV, v_collection_ICV] = dp_ICV_permutation_serial(X, Y, COV, PARAM, DIAG) + +switch PARAM.train + case 1 + IN_x.train = X.train; + IN_x.test = X.test; + IN_y.train = Y.train; + IN_y.test = Y.test; + COV.train = COV.train; + COV.test = COV.test; + permmat = nk_PermInd2(PARAM.B, DIAG.train); +end + +% 1) retrain on single test splits within +% folds, then merge, 2) retrain on all inner folds +% separately, then merge with mean or median, 3) +% retrain on entirety of inner folds, 4) use already +% existing u and v from inner folds without retraining + +RHO_collection_ICV = nan(PARAM.B,1); +u_collection_ICV = nan(PARAM.B, size(IN_x.train,2)); +v_collection_ICV = nan(PARAM.B, size(IN_y.train,2)); + +for ii=1:PARAM.B + + switch PARAM.train + case 2 + find_log = ii==PARAM.perm_coding(3,:); + if any(find_log) + w = PARAM.perm_coding(2, find_log); + ob=1; + COV.test = COV(PARAM.cv.TestInd{ob,w},:); + COV.train = COV(PARAM.cv.TrainInd{ob,w},:); + + IN_x.train = X(PARAM.cv.TrainInd{ob,w},:); + IN_x.test = X(PARAM.cv.TestInd{ob,w},:); + + IN_y.train = Y(PARAM.cv.TrainInd{ob,w},:); + IN_y.test = Y(PARAM.cv.TestInd{ob,w},:); + + DIAG.train = DIAG(PARAM.cv.TrainInd{ob,w},:); + + permmat = nk_PermInd2(PARAM.B, DIAG.train); + end + end + + % perform procrustean transformation to minimize rotation effects of + % permutated y matrix, if V_opt available + IN_y.train = IN_y.train(permmat(ii,:),:); + + OUT_x = dp_correctscale(IN_x,COV,PARAM.scale); + OUT_y = dp_correctscale(IN_y,COV,PARAM.scale); + + if ~islogical(PARAM.V) + [RHO_collection_ICV(ii,1), u_collection_ICV(ii,:), v_collection_ICV(ii,:), ~, ~, ~] = dp_spls_full(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, PARAM.cu, PARAM.cv, PARAM.correlate, PARAM.V); + else + [RHO_collection_ICV(ii,1), u_collection_ICV(ii,:), v_collection_ICV(ii,:), ~, ~, ~] = dp_spls_full(OUT_x.train,OUT_y.train,OUT_x.test, OUT_y.test, PARAM.cu, PARAM.cv, PARAM.correlate); + end + +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ICV_spls.m b/Visualization_Module/dp_ICV_spls.m new file mode 100644 index 0000000..4df4606 --- /dev/null +++ b/Visualization_Module/dp_ICV_spls.m @@ -0,0 +1,30 @@ +%% DP function for one k split + +function [RHO, u, v, epsilon, omega] = dp_ICV_spls(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method) + +%perform SPLS on the training data using the current cu/cv combination +[u_temp, v_temp, ~] = spls_suppressed_display(training_data_x,training_data_y,cu,cv); + +%compute the correlation between the projections of the training and +%test matrices onto the SPLS latent space spanned by the weight vectors +epsilon_temp = test_data_x*u_temp; +omega_temp = test_data_y*v_temp; +RHO_temp = corr(epsilon_temp, omega_temp, 'Type', correlation_method); + +f_invert = @(x)(-1*x); + +if RHO_temp<0 + RHO = f_invert(RHO_temp); + u = u_temp; + v = f_invert(v_temp); + epsilon = epsilon_temp; + omega = test_data_y*v; +else + RHO = RHO_temp; + u = u_temp; + v = v_temp; + epsilon = epsilon_temp; + omega = omega_temp; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_LSOVpartition.m b/Visualization_Module/dp_LSOVpartition.m new file mode 100644 index 0000000..d5a5fc2 --- /dev/null +++ b/Visualization_Module/dp_LSOVpartition.m @@ -0,0 +1,21 @@ +%% DP function for LSOV partitions +function partition = dp_LSOVpartition(sites) +% sites needs to be coded binary with each site being represented by one +% columns + +for r=1:size(sites,2) + if sum(sites(:,r))==0 + log_full(1,r)=false; + else + log_full(1,r)=true; + end +end + +sites = sites(:,log_full); + +for i=1:size(sites,2) + partition.TestInd{1,i} = find(sites(:,i)==1); + partition.TrainInd{1,i} = find(sites(:,i)==0); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_RHO_avg_10_sets.m b/Visualization_Module/dp_RHO_avg_10_sets.m new file mode 100644 index 0000000..4c39f0e --- /dev/null +++ b/Visualization_Module/dp_RHO_avg_10_sets.m @@ -0,0 +1,49 @@ +%% new function for permutation testing + +function dp_RHO_avg_10_sets(i, size_sets_str, analysis_folder) + +load([analysis_folder '/keep_in_partition.mat']); +% dp_txt_write(analysis_folder, ['init_' i],'initialized','%s \n'); + +size_sets = str2double(size_sets_str); +RHO_median_collection = nan(size_sets,1); + +for ii=1:size_sets + if exist([analysis_folder '/cu_' i '_' num2str(ii) '.txt'],'file') + RHO_collection = nan(size(cv_inner.TestInd,2),1); + cu = dp_txtscan([analysis_folder '/cu_', i, '_', num2str(ii), '.txt'], '%f'); + cv = dp_txtscan([analysis_folder '/cv_', i, '_', num2str(ii), '.txt'], '%f'); + + if cu > sqrt(size(keep_in_data_x,2)) + cu = sqrt(size(keep_in_data_x,2)); + end + + if cv > sqrt(size(keep_in_data_y,2)) + cv = sqrt(size(keep_in_data_y,2)); + end + + for k=1:size(cv_inner.TestInd,2) + test_data_x = keep_in_data_x(cv_inner.TestInd{k},:); + test_data_y = keep_in_data_y(cv_inner.TestInd{k},:); + training_data_x = keep_in_data_x(cv_inner.TrainInd{k},:); + training_data_y = keep_in_data_y(cv_inner.TrainInd{k},:); + RHO_collection(k,1) = dp_k_split(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method); + end + + RHO_median_collection(ii,1) = median(RHO_collection); +% FID = fopen([analysis_folder, '/init_' i '.txt'], 'a'); +% fprintf(FID, '%d \n', ii); +% fclose(FID); + end + +end + +% RHO_median_collection(isnan(RHO_median_collection))=[]; +try + dp_txt_write(analysis_folder, ['RHO_' i], RHO_median_collection, '%.4f\n'); +catch ME + ME.message + save([analysis_folder, '/RHO_', i, '.mat'], 'RHO_median_collection'); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_RHO_b_100_sets.m b/Visualization_Module/dp_RHO_b_100_sets.m new file mode 100644 index 0000000..2f22b85 --- /dev/null +++ b/Visualization_Module/dp_RHO_b_100_sets.m @@ -0,0 +1,39 @@ +%% new function for permutation testing + +function dp_RHO_b_100_sets(i, size_sets_str, analysis_folder) + +load([analysis_folder '/opt_param.mat']); +% dp_txt_write(analysis_folder, ['init_' i],'initialized','%s \n'); + +size_sets = str2double(size_sets_str); +RHO_b_collection = nan(size_sets,1); + +for pp=1:str2double(i) + permmat = nk_PermInd2(size_sets, keep_in_Diag); +end + +for ii=1:size_sets + + % perform procrustean transformation to minimize rotation effects of + % permutated y matrix + perm_data_y = keep_in_data_y(permmat(ii,:),:); + + [u_b, v_b, ~]=dp_spls_resample(keep_in_data_x, perm_data_y, cu_opt, cv_opt, V_opt); + + % compute the absolute correlation between the hold_out data + % and the permuted u_b and v_b + RHO_b_collection(ii,1) = abs(corr(hold_out_data_x*u_b,hold_out_data_y*v_b, 'Type', correlation_method)); +% FID = fopen([analysis_folder, '/init_' i '.txt'], 'a'); +% fprintf(FID, '%d \n', ii); +% fclose(FID); +end + +% save the calculated RHO_b value +try + dp_txt_write(analysis_folder, ['RHO_' i], RHO_b_collection, '%.4f\n'); +catch ME + ME.message + save([analysis_folder, '/RHO_', i, '.mat'], 'RHO_b_collection'); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_RHO_fullpara.m b/Visualization_Module/dp_RHO_fullpara.m new file mode 100644 index 0000000..887a3b8 --- /dev/null +++ b/Visualization_Module/dp_RHO_fullpara.m @@ -0,0 +1,51 @@ +%% function for RHO parallel computing + +function [RHO_output, success] = dp_RHO_fullpara(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets) + +RHO_bash = dp_bash_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization +mydir = size(dir([analysis_folder '/RHO_*.txt']),1); +RHO_collection = []; + +while mydir(0.1*size(RHO_output,1)) + success=false; +else + success=true; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_RHO_fullpara_slurm.m b/Visualization_Module/dp_RHO_fullpara_slurm.m new file mode 100644 index 0000000..7620f2e --- /dev/null +++ b/Visualization_Module/dp_RHO_fullpara_slurm.m @@ -0,0 +1,51 @@ +%% function for RHO parallel computing + +function [RHO_output, success] = dp_RHO_fullpara_slurm(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets) + +RHO_bash = dp_slurm_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets); + +cd(analysis_folder); +system(['sbatch ' RHO_bash]); + +% see hyperparameter optimization +mydir = size(dir([analysis_folder '/RHO_*.txt']),1); +RHO_collection = []; + +while mydir(0.1*size(RHO_output,1)) + success=false; +else + success=true; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_RHO_fullpara_time.m b/Visualization_Module/dp_RHO_fullpara_time.m new file mode 100644 index 0000000..628946a --- /dev/null +++ b/Visualization_Module/dp_RHO_fullpara_time.m @@ -0,0 +1,51 @@ +%% function for RHO parallel computing + +function [RHO_output, success] = dp_RHO_fullpara_time(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets, time_limit) + +RHO_bash = dp_bash_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets); + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization +mydir = size(dir([analysis_folder '/RHO_*.txt']),1); +RHO_collection = []; + +tic +while mydir (time_limit*60) + for i=1:total_jobs + if ~exist([analysis_folder '/RHO_' num2str(i) '.txt']) + switch type_analysis + case 'hyperopt' + dp_RHO_avg_10_sets(num2str(i), num2str(size_sets), analysis_folder); + case 'permutation' + dp_RHO_b_100_sets(num2str(i), num2str(size_sets), analysis_folder); + end + end + end + mydir = size(dir([analysis_folder '/RHO_*.txt']),1); + end +end + + + +for i=1:total_jobs + RHO_collection_temp = []; + RHO_collection_temp = dp_txtscan([analysis_folder '/RHO_' num2str(i),'.txt'], '%f\n'); + delete([analysis_folder '/RHO_' num2str(i),'.txt']); + RHO_collection = [RHO_collection; RHO_collection_temp]; +end + +RHO_output = RHO_collection; + + +if sum(isnan(RHO_output))>(0.1*size(RHO_output,1)) + success=false; +else + success=true; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_RHO_parallel.m~ b/Visualization_Module/dp_RHO_parallel.m~ new file mode 100644 index 0000000..fbba155 --- /dev/null +++ b/Visualization_Module/dp_RHO_parallel.m~ @@ -0,0 +1,59 @@ +%% function for RHO parallel computing + +function [RHO_output] = dp_RHO_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, tp_size_sets, cu, cv) + +switch type_analysis + case 'hyperopt' + search_prefix = 'RHO_'; + RHO_bash = dp_bash_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, tp_size_sets, cu, cv); + RHO_collection = zeros(total_jobs,1); + case 'permutation' + search_prefix = 'RHO_b_'; + RHO_bash = dp_bash_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, tp_size_sets); + RHO_collection = zeros(total_jobs*tp_size_sets,1); +end + +cd(analysis_folder); +system(['qsub ' RHO_bash]); + +% see hyperparameter optimization + +while sum(sum(RHO_collection == 0))>0; + for i=1:size(RHO_collection,1) + if exist([analysis_folder '/' search_prefix, num2str(i),'.txt'],'file') + try + RHO_collection(i,1) = dp_txtscan([analysis_folder '/' search_prefix, num2str(i),'.txt'], '%f'); + delete([analysis_folder '/' search_prefix, num2str(i),'.txt']); + catch ME + disp(ME.identifier); + RHO_collection(i,1) = NaN; + end + end + end +end + + + + +delete([analysis_folder '/perm_' i '_' num2str(ii) '.txt']); + +% pause('on'); +% pause(60); +% +% for i=1:size(RHO_collection,1) +% if exist([analysis_folder '/' search_prefix, num2str(i),'.txt'],'file') +% RHO_collection(i,1) = dp_txtscan([analysis_folder '/' search_prefix, num2str(i),'.txt'], '%f'); +% delete([analysis_folder '/' search_prefix, num2str(i),'.txt']); +% end +% end +% +switch type_analysis + case 'hyperopt' +% RHO_collection_completed = dp_fill_nan(RHO_collection, spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, tp_size_sets, cu, cv); + RHO_output = mean(RHO_collection); + case 'permutation' +% RHO_collection_completed = dp_fill_nan(RHO_collection, spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name); + RHO_output = RHO_collection; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_atlas_find.m b/Visualization_Module/dp_atlas_find.m new file mode 100644 index 0000000..8bf29f6 --- /dev/null +++ b/Visualization_Module/dp_atlas_find.m @@ -0,0 +1,17 @@ +%% DP function to find atlases + +function atlases = dp_atlas_find(IN) + +atlases_dir = dir(IN.atlas_directory); +dirs_names = {atlases_dir.name}; +atlases={}; +for i=1:size(IN.atlases_chosen,2) + log_find = contains(dirs_names, IN.atlases_chosen{i}, 'IgnoreCase', true); + atlas_dir = [IN.atlas_directory, '/', dirs_names{log_find}]; + spec_atlas_dir = dir([atlas_dir, '/*', num2str(IN.sample_size), '*.mat']); + spec_names = {spec_atlas_dir.name}; + spec_atlas_name = spec_names{sum([contains(spec_names, IN.var_names, 'IgnoreCase', true); contains(spec_names, 'X')],1)==2}; + atlases{1,i} = [atlas_dir, '/', spec_atlas_name]; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_atlas_table_readout.m b/Visualization_Module/dp_atlas_table_readout.m new file mode 100644 index 0000000..4de01d1 --- /dev/null +++ b/Visualization_Module/dp_atlas_table_readout.m @@ -0,0 +1,180 @@ +%% DP script to write out voxels as table for paper +function atlas_table_readouts = dp_atlas_table_readout(IN) + +% load output file +atlases = IN.atlases(contains(IN.atlases, {'brainnetome', 'cerebellum'}, 'IgnoreCase', true)); + +for a=1:size(atlases,2) + if contains(atlases{a}, 'brainnetome', 'IgnoreCase', true) + indices_temp = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_indices.mat'); + indices_temp_names = fieldnames(indices_temp); + labels_regions = indices_temp.(indices_temp_names{1}); + + numbers_region_matrix = [[1:2:size(labels_regions,1)]',[2:2:size(labels_regions,1)]']; + + % create algorithm to sort all LV voxels in cerebrum_matrix template + fields = fieldnames(IN.atlas_readouts); + temp_names = IN.atlas_readouts.vector_1.brainnetome.positive.Properties.VariableNames; + + for i=1:(size(fields,1)-1) + % create empty double template + atlas_table_readouts.table_collection.(['LV_', num2str(i)]).cerebrum_matrix = nan(size(numbers_region_matrix,1), 2*size(numbers_region_matrix,2)); + fields1 = fieldnames(IN.atlas_readouts.(fields{i}).brainnetome); + for ii=1:size(fields1,1) + temp_vector = IN.atlas_readouts.(fields{i}).brainnetome.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(numbers_region_matrix, temp_vector{iii,1})); + if col == 1 + col = ii*col; + elseif col == 2 + col = ii+col; + end + atlas_table_readouts.table_collection.(['LV_', num2str(i)]).cerebrum_matrix(row, col) = temp_vector{iii,2}; + end + end + end + + % get absolute voxel values + temp_atlas = load(atlases{a}); + temp_atlas_names = fieldnames(temp_atlas); + atlas_for_analysis = round(temp_atlas.(temp_atlas_names{1})); + + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + + test = ismember(numbers_region_matrix, C_atlas); + counts_atlas(1)=[]; + nn=1; + for i=1:size(numbers_region_matrix,1) + for ii=1:2 + if test(i,ii) + atlas_table_readouts.complete_voxels_regions(i,ii)=counts_atlas(nn); + nn=nn+1; + else + atlas_table_readouts.complete_voxels_regions(i,ii)=NaN; + end + end + end + + atlas_table_readouts.complete_voxels_regions_mean = nanmean(atlas_table_readouts.complete_voxels_regions,2); + + % get extra cerebellum data + % create template to sort voxels in + + elseif contains(atlases{a}, 'cerebellum', 'IgnoreCase', true) + load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/cerebellum_vermis_indices.mat'); + + temp_names = IN.atlas_readouts.vector_1.cerebellum.positive.Properties.VariableNames; + + % first compute the main cerebellum hemispheres using + % cerebellumhemispheres_indices + + for i=1:(size(fields,1)-1) + % create empty double template + atlas_table_readouts.table_collection.(['LV_', num2str(i)]).cerebellum_matrix = nan(size(cerebellumhemispheres_indices,1), 2*size(cerebellumhemispheres_indices,2)); + fields1 = fieldnames(IN.atlas_readouts.(fields{i}).cerebellum); + for ii=1:size(fields1,1) + try temp_vector = IN.atlas_readouts.(fields{i}).cerebellum.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(cerebellumhemispheres_indices, temp_vector{iii,1})); + if col == 1 + col = ii*col; + elseif col == 2 + col = ii+col; + end + atlas_table_readouts.table_collection.(['LV_', num2str(i)]).cerebellum_matrix(row, col) = temp_vector{iii,2}; + end + end + end + + atlas_table_readouts.table_collection.(['LV_', num2str(i)]).vermis_matrix = nan(size(vermis_indices,1), 2*size(vermis_indices,2)); + fields1 = fieldnames(IN.atlas_readouts.(fields{i}).cerebellum); + for ii=1:size(fields1,1) + try temp_vector = IN.atlas_readouts.(fields{i}).cerebellum.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(vermis_indices, temp_vector{iii,1})); + atlas_table_readouts.table_collection.(['LV_', num2str(i)]).vermis_matrix(row, ii) = temp_vector{iii,2}; + end + end + end + + + end + + % get absolute voxel values for cerebellum + temp_atlas = load(atlases{a}); + temp_atlas_names = fieldnames(temp_atlas); + atlas_for_analysis = round(temp_atlas.(temp_atlas_names{1})); + + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + + test = ismember(cerebellumhemispheres_indices, C_atlas); + counts_atlas(1)=[]; + nn=1; + for i=1:size(cerebellumhemispheres_indices,1) + for ii=1:2 + if test(i,ii) + atlas_table_readouts.complete_voxels_cerebellum(i,ii)=counts_atlas(nn); + nn=nn+1; + else + atlas_table_readouts.complete_voxels_cerebellum(i,ii)=NaN; + end + end + end + + atlas_table_readouts.complete_voxels_cerebellum_mean = nanmean(atlas_table_readouts.complete_voxels_cerebellum,2); + + % get absolute voxel values for vermis + test = ismember(vermis_indices, C_atlas); + try counts_atlas(1)=[]; +% catch +% counts_atlas(1)=[]; + end + nn=1; + for i=1:size(vermis_indices,1) + % for ii=1:2 + if test(i) + atlas_table_readouts.complete_voxels_vermis(i,1)=counts_atlas(nn); + nn=nn+1; + else + atlas_table_readouts.complete_voxels_vermis(i,1)=NaN; + end + % end + end + end +end + +LV_temp = fieldnames(atlas_table_readouts.table_collection); +temp_coll_bl = [round(atlas_table_readouts.complete_voxels_regions_mean); round(atlas_table_readouts.complete_voxels_cerebellum_mean)]; +temp_coll_ul = atlas_table_readouts.complete_voxels_vermis; +temp_coll_bl_names = {'Total voxels'}; +temp_coll_ul_names = {'Total voxels'}; +bl_names = {'left', 'right'}; +pn_names = {'pos', 'neg'}; +bl_pn_names = {}; +for i=1:size(bl_names,2) + for ii=1:size(pn_names,2) + bl_pn_names = [bl_pn_names, [bl_names{i}, '_', pn_names{ii}]]; + end +end + +for i=1:size(LV_temp,1) + temp_coll_bl = [temp_coll_bl, [atlas_table_readouts.table_collection.(LV_temp{i}).cerebrum_matrix; atlas_table_readouts.table_collection.(LV_temp{i}).cerebellum_matrix]]; + temp_coll_ul = [temp_coll_ul, atlas_table_readouts.table_collection.(LV_temp{i}).vermis_matrix]; + temp_coll_bl_names = [temp_coll_bl_names, cellfun(@(x) append(x, ['_', LV_temp{i}]), bl_pn_names, 'UniformOutput', false)]; + temp_coll_ul_names = [temp_coll_ul_names, cellfun(@(x) append(x, ['_', LV_temp{i}]), pn_names, 'UniformOutput', false)]; +end + +load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/cerebellum_names.mat'); + +reduced_labels_regions = strrep(labels_regions(1:2:end, 2), ' L', ''); + +atlas_table_readouts.complete_table_cerebrum_cerebellum = array2table(temp_coll_bl, 'VariableNames', temp_coll_bl_names, 'RowNames', [reduced_labels_regions; cerebellum_hemispheres_names]); +atlas_table_readouts.complete_table_vermis = array2table(temp_coll_ul, 'VariableNames', temp_coll_ul_names, 'RowNames', cerebellum_vermis_names); + +writetable(atlas_table_readouts.complete_table_cerebrum_cerebellum, [IN.a_folder, '/table_readouts.xlsx'], 'WriteRowNames', true, 'Sheet', 'Cerebrum_Cerebellum'); +writetable(atlas_table_readouts.complete_table_vermis, [IN.a_folder, '/table_readouts.xlsx'], 'WriteRowNames', true, 'Sheet', 'Vermis'); + +end + diff --git a/Visualization_Module/dp_auc_testing.m b/Visualization_Module/dp_auc_testing.m new file mode 100644 index 0000000..6fa5b00 --- /dev/null +++ b/Visualization_Module/dp_auc_testing.m @@ -0,0 +1,37 @@ +%% DP function to compute AUC and test whether one value belongs to a distribution + +function [p_val, h] = dp_auc_testing(IN) + +dist = IN.dist; +val = IN.val; +if isfield(IN, 'bin_width') + bin_width = IN.bin_width; +else + bin_width = 0.001; +end + +if isfield(IN, 'testing_precision') + testing_precision = IN.testing_precision; +else + testing_precision = 'lenient'; +end + +[N, edges] = histcounts(dist, 'BinWidth', bin_width); +x = 0:bin_width:((size(edges,2)-2)*bin_width); +y = N; + +[~, index] = min(abs(x-val)); +switch testing_precision + case 'lenient' + p_val = sum(cumtrapz(x(index:end), y(index:end)))/sum(cumtrapz(x,y)); + case 'conservative' + p_val = trapz(x(index:end), y(index:end))/trapz(x,y); +end + +if p_val<0.05 + h=true; +else + h=false; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_bootstrap_pruning.m b/Visualization_Module/dp_bootstrap_pruning.m new file mode 100644 index 0000000..25c35a4 --- /dev/null +++ b/Visualization_Module/dp_bootstrap_pruning.m @@ -0,0 +1,39 @@ +%% DP function for bootstrap sampling + +function dp_bootstrap_pruning(results_file, log_application) + +load(results_file); + +output.old_final_parameters = output.final_parameters; + +for i=1:size(output.final_parameters,1) + lv_name = ['LV_', num2str(i)]; + u = output.final_parameters{i, matches(output.opt_parameters_names, 'u')}; + switch log_application + case 'CI' + log_ci_u = output.bootstrap_results.(lv_name).log_ci_u; + u(log_ci_u) = 0; + case 'BS' + log_bs_u = output.bootstrap_results.(lv_name).log_bs_u; + u(log_bs_u) = 0; + end + output.final_parameters{i, matches(output.opt_parameters_names, 'u')} = u; + + v = output.final_parameters{i, matches(output.opt_parameters_names, 'v')}; + switch log_application + case 'CI' + log_ci_v = output.bootstrap_results.(lv_name).log_ci_v; + v(log_ci_v) = 0; + case 'BS' + log_bs_v = output.bootstrap_results.(lv_name).log_bs_v; + v(log_bs_v) = 0; + end + output.final_parameters{i, matches(output.opt_parameters_names, 'v')} = v; + +end + +input.name = [input.name, 'boot_', log_application]; + +save(strrep(results_file, '.mat', ['_', log_application, '.mat']), 'input', 'output', 'setup'); + +end diff --git a/Visualization_Module/dp_bootstrap_sampling b/Visualization_Module/dp_bootstrap_sampling new file mode 100644 index 0000000..5c85c6e --- /dev/null +++ b/Visualization_Module/dp_bootstrap_sampling @@ -0,0 +1,2 @@ +%% DP function for bootstrap sampling + diff --git a/Visualization_Module/dp_bootstrap_sampling.m b/Visualization_Module/dp_bootstrap_sampling.m new file mode 100644 index 0000000..9bb9b71 --- /dev/null +++ b/Visualization_Module/dp_bootstrap_sampling.m @@ -0,0 +1,122 @@ +%% DP function for bootstrap sampling + +function OUT = dp_bootstrap_sampling(results_file) + +load(results_file); + +boot_size = inp; +[~,bootsam] = bootstrp(boot_size,[],input.final_PSN); + +try temp = load(input.X); + fieldnames_temp = fields(temp); + X = temp.(fieldnames_temp{1}); +catch + X = input.X; +end + +for i=1:size(bootsam,2) + X_boot = X(bootsam(:,i),:); + Y_boot = input.Y(bootsam(:,i),:); + labels_boot = input.DiagNames(bootsam(:,i),:); + + for ii=1:size(output.final_parameters,1) + if ii == 1 + covars_boot = input.covariates(bootsam(:,i),:); + else + covars_boot = nan(size(input.covariates,1),1); + end + input.cs_method.subgroup_train = matches(labels_boot, input.cs_method.correction_subgroup); + input.cs_method.subgroup_test = matches(labels_boot, input.cs_method.correction_subgroup); + [OUT_x, OUT_y] = dp_master_correctscale(X_boot, Y_boot, covars_boot, input.cs_method, input.correction_target); + cu = output.final_parameters{ii, matches(output.opt_parameters_names, 'cu')}; + cv = output.final_parameters{ii, matches(output.opt_parameters_names, 'cv')}; + + [RHO_boot.(['LV_', num2str(ii)])(1,i), u_boot.(['LV_', num2str(ii)])(:,i), v_boot.(['LV_', num2str(ii)])(:,i), ~, ~, ~] = dp_spls_full(OUT_x,OUT_y,OUT_x, OUT_y, cu, cv, input.correlation_method); + % [u_boot.(['LV_', num2str(ii)])(:,i), v_boot.(['LV_', num2str(ii)])(:,i), ~, ~, ~, ~] = dp_spls(OUT_x, OUT_y, cu, cv); + + end + i +end + +output.old_final_parameters = output.final_parameters; + +log_application = 'BS'; + +for i=1:size(output.final_parameters,1) + lv_name = ['LV_', num2str(i)]; +% RHO_boot.(lv_name) = output.bootsampling_results.(lv_name).RHO_sample; + RHO = output.final_parameters{i, matches(output.opt_parameters_names, 'RHO')}; + RHO_sample = RHO_boot.(lv_name); + RHO_mean = mean(RHO_sample); + RHO_SE = std(RHO_sample)/(sqrt(boot_size)); + ci_RHO = [RHO_mean - 1.96 * RHO_SE, RHO_mean + 1.96 * RHO_SE]; + bs_ratio_RHO = RHO/RHO_SE; + output.bootsampling_results.(lv_name).ci_RHO = ci_RHO; + output.bootsampling_results.(lv_name).bs_ratio_RHO = bs_ratio_RHO; + output.bootsampling_results.(lv_name).RHO_sample = RHO_sample; + +% u_boot.(lv_name) = output.bootsampling_results.(lv_name).u_sample; + u_sample = u_boot.(lv_name); + u_analysis = output.final_parameters{i, matches(output.opt_parameters_names, 'u')}; + u_mean = mean(u_sample,2); + u_SE = std(u_sample,0,2)/(sqrt(boot_size)); + bs_ratio_u = u_analysis./u_SE; + bs_ratio_u(isnan(bs_ratio_u)) = 0; + bs_ratio_u(bs_ratio_u == Inf) = 0; + log_bs_u = abs(bs_ratio_u)<=2; + ci_u = [u_mean - 1.96 * u_SE, u_mean + 1.96 * u_SE]; + log_ci_u = ((sum(ci_u>0, 2) == 2) + (sum(ci_u<0, 2) == 2)) == 0; + u = output.final_parameters{i, matches(output.opt_parameters_names, 'u')}; + switch log_application + case 'CI' + u(log_ci_u) = 0; + case 'BS' + u(log_bs_u) = 0; + end + output.final_parameters{i, matches(output.opt_parameters_names, 'u')} = u; + output.bootsampling_results.(lv_name).ci_u = ci_u; + output.bootsampling_results.(lv_name).bs_ratio_u = bs_ratio_u; + output.bootsampling_results.(lv_name).u_sample = u_sample; + output.bootsampling_results.(lv_name).log_bs_u = log_bs_u; + output.bootsampling_results.(lv_name).sum_bs_u = sum(log_bs_u); + output.bootsampling_results.(lv_name).log_ci_u = log_ci_u; + output.bootsampling_results.(lv_name).sum_ci_u = sum(log_ci_u); + +% v_boot.(lv_name) = output.bootsampling_results.(lv_name).v_sample; + v_sample = v_boot.(lv_name); + v_analysis = output.final_parameters{i, matches(output.opt_parameters_names, 'v')}; + v_mean = mean(v_sample,2); + v_SE = std(v_sample,0,2)/(sqrt(boot_size)); + bs_ratio_v = v_analysis./v_SE; + bs_ratio_v(isnan(bs_ratio_v)) = 0; + bs_ratio_v(bs_ratio_v == Inf) = 0; + log_bs_v = abs(bs_ratio_v)<=2; + ci_v = [v_mean - 1.96 * v_SE, v_mean + 1.96 * v_SE]; + log_ci_v = ((sum(ci_v>0, 2) == 2) + (sum(ci_v<0, 2) == 2)) == 0; + v = output.final_parameters{i, matches(output.opt_parameters_names, 'v')}; + switch log_application + case 'CI' + v(log_ci_v) = 0; + case 'BS' + v(log_bs_v) = 0; + end + output.final_parameters{i, matches(output.opt_parameters_names, 'v')} = v; + output.bootsampling_results.(lv_name).ci_v = ci_v; + output.bootsampling_results.(lv_name).bs_ratio_v = bs_ratio_v; + output.bootsampling_results.(lv_name).v_sample = v_sample; + output.bootsampling_results.(lv_name).log_bs_v = log_bs_v; + output.bootsampling_results.(lv_name).sum_bs_v = sum(log_bs_v); + output.bootsampling_results.(lv_name).log_ci_v = log_ci_v; + output.bootsampling_results.(lv_name).sum_ci_v = sum(log_ci_v); + + % testing + output.testing.(lv_name).u_comparison = sum(log_bs_u == log_ci_u)/size(log_bs_u,1); + output.testing.(lv_name).v_comparison = sum(log_bs_v == log_ci_v)/size(log_bs_v,1); +end + +input.name = [input.name, 'boot_', log_application]; + +save(['result_bootstrapping_', log_application, '.mat'], 'input', 'output', 'setup'); + + +end diff --git a/Visualization_Module/dp_chi2.m b/Visualization_Module/dp_chi2.m new file mode 100644 index 0000000..21e45a9 --- /dev/null +++ b/Visualization_Module/dp_chi2.m @@ -0,0 +1,92 @@ +%% playground + +function [collection, collection_names, h, p, stats]=dp_chi2(obs, type) + +switch type + case 'absolute' + sum_temp=sum(obs,2); + size_temp=size(obs,1); + + bins = 1:size(sum_temp,1); + [h.all,p.all,stats.all]=chi2gof(bins, 'Frequency', sum_temp, 'Expected', ones(size_temp,1)*(1/size_temp)*sum(sum_temp)); + + h.detailed=nan(size_temp,size_temp); + p.detailed=nan(size_temp,size_temp); + + for i=1:size_temp + for ii=(i+1):size_temp + temp_vector = [sum_temp(i); sum_temp(ii)]; + bins = 1:size(temp_vector,1); + [~,p.detailed(i,ii),~]=chi2gof(bins, 'Frequency',temp_vector, 'Expected', ones(size(temp_vector,1),1)*(1/size(temp_vector,1))*sum(temp_vector)); + end + end + + p.p_value_FDR = dp_FDR(p.detailed(~isnan(p.detailed)), 0.05); + + h.detailed=nan(size_temp,size_temp); + p.detailed=nan(size_temp,size_temp); + + for i=1:size_temp + for ii=(i+1):size_temp + temp_vector = [sum_temp(i); sum_temp(ii)]; + bins = 1:size(temp_vector,1); + [h.detailed(i,ii),p.detailed(i,ii),stats.detailed(i,ii)]=chi2gof(bins, 'Frequency',temp_vector, 'Expected', ones(size(temp_vector,1),1)*(1/size(temp_vector,1))*sum(temp_vector), 'Alpha', p.p_value_FDR); + end + end + + nn=1; + for i=1:size(p.detailed,1) + for ii=1:size(p.detailed,2) + if ~isnan(p.detailed(i,ii)) + collection(nn,:) = [i, ii, p.detailed(i,ii), stats.detailed(i,ii).chi2stat]; + nn=nn+1; + end + end + end + + collection_names = {'group 1', 'group 2', 'p', 'chi2'}; + case 'ratio' + + sum_temp=sum(obs,2)/sum(sum(obs,2)); + size_temp=size(obs,2); + + bins = 1:size(sum_temp,1); + [h.all,p.all,stats.all]=chi2gof(bins, 'Frequency', sum_temp, 'Expected', ones(size_temp,1)*(1/size_temp)*sum(sum_temp)); + + h.detailed=nan(size_temp,size_temp); + p.detailed=nan(size_temp,size_temp); + + for i=1:size_temp + for ii=(i+1):size_temp + temp_vector = [sum_temp(i); sum_temp(ii)]; + bins = 1:size(temp_vector,1); + [~,p.detailed(i,ii),~]=chi2gof(bins, 'Frequency',temp_vector, 'Expected', ones(size(temp_vector,1),1)*(1/size(temp_vector,1))*sum(temp_vector)); + end + end + + p.p_value_FDR = dp_FDR(p.detailed(~isnan(p.detailed)), 0.05); + + h.detailed=nan(size_temp,size_temp); + p.detailed=nan(size_temp,size_temp); + + for i=1:size_temp + for ii=(i+1):size_temp + temp_vector = [sum_temp(i); sum_temp(ii)]; + bins = 1:size(temp_vector,1); + [h.detailed(i,ii),p.detailed(i,ii),stats.detailed(i,ii)]=chi2gof(bins, 'Frequency',temp_vector, 'Expected', ones(size(temp_vector,1),1)*(1/size(temp_vector,1))*sum(temp_vector), 'Alpha', p.p_value_FDR); + end + end + + nn=1; + for i=1:size(p.detailed,1) + for ii=1:size(p.detailed,2) + if ~isnan(p.detailed(i,ii)) + collection(nn,:) = [i, ii, p.detailed(i,ii), stats.detailed(i,ii).chi2stat]; + nn=nn+1; + end + end + end + +end + +end diff --git a/Visualization_Module/dp_ciss_groups.m b/Visualization_Module/dp_ciss_groups.m new file mode 100644 index 0000000..d2b8220 --- /dev/null +++ b/Visualization_Module/dp_ciss_groups.m @@ -0,0 +1,101 @@ +%% DP script for CISS differences + +%% new script to compute CISS group differences + +m_discovery = matfile('/volume/HCStress/Analysis/02-Jul-2020/CISS_636_IQRadd_NCV55_single_folds_bestmerge_noval_min10_2020_5000AUC_Dev/final_results/result_final_vis.mat', 'Writable', true); +% m_replication = matfile('/volume/HCStress/Analysis/25-Feb-2020_CTQ_627_replication_sample_CTQ_fixed_datafile.mat', 'Writable', true); + +input_discovery = m_discovery.input; +% input_replication = m_replication.input; + +output_discovery = m_discovery.output; + +% load('/volume/HCStress/Analysis/replication_results_SPLS_Stress/validation_replication_detailed_results_comb_app_1.mat') + +% get CTQ data and compute total and subscale scores +CISS=struct; +% CISS.total = 1:24; +CISS.task = [5,9,12,14,15,19,23,24]; +CISS.emotion = [2,3,7,8,11,13,16,21]; +CISS.avoidance_distraction = [4,6,10,22]; +CISS.social_diversion = [1,17,18,20]; +CISS_collection_discovery = input_discovery.behavior; +CISS_collection_discovery_names = input_discovery.behavior_names; +CISS_collection_discovery(:, [CISS.task, CISS.emotion, CISS.avoidance_distraction, CISS.social_diversion]) = input_discovery.behavior(:, 1:24); +CISS_collection_discovery_names(:, [CISS.task, CISS.emotion, CISS.avoidance_distraction, CISS.social_diversion]) = input_discovery.behavior_names(1:24); +CISS_discovery=[];CISS_replication=[]; +fields = fieldnames(CISS); +for i=1:size(fields,1) + CISS_discovery(:,i) = sum(CISS_collection_discovery(:, CISS.(fields{i})),2); + CISS_discovery_names{1,i} = fields{i}; +% CISS_replication(:,i) = sum(CTQ_collection_replication(:, CTQ.(fields{i})),2); +end + +% input_replication.CISS_collection = CISS_replication; +% input_replication.CISS_collection_names = fieldnames(CTQ)'; +output_discovery.CISS_collection = CISS_discovery; +output_discovery.CISS_collection_names = fieldnames(CISS)'; + +m_discovery.input = input_discovery; +% m_replication.input = input_replication; +m_discovery.output = output_discovery; + + +labels= {'CHR', 'HC', 'ROD', 'ROP'}; +labels_disc = zeros(size(input_discovery.data_collection.Labels,1),1); +% labels_rep = ones(size(input_replication.data_collection.Labels,1),1); +CISS_results=[];temp_results=[];mean_std_results=[];temp_mean_std=[]; + +for i=1:size(CISS_discovery,2) +% temp_results=[];temp_mean_std=[]; +% temp_data = [CISS_discovery(:,i); CISS_replication(:,i)]; +% [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc; labels_rep]); +% temp_results = [temp_results, p]; + temp_mean_std = [nanmean(CISS_discovery(:,i)); nanstd(CISS_discovery(:,i))]; +% close all + for ii=1:size(labels,2) + log_temp_d = contains(input_discovery.data_collection.Labels, labels{ii}); +% log_temp_r = contains(input_replication.data_collection.Labels, labels{ii}); + temp_data = CISS_discovery(log_temp_d,i); + temp_mean_std = [temp_mean_std, [nanmean(CISS_discovery(log_temp_d,i)); nanstd(CISS_discovery(log_temp_d,i))]]; +% [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc(log_temp_d); labels_rep(log_temp_r)]); +% temp_results = [temp_results, p]; + end +% CISS_results = [CISS_results; temp_results]; + mean_std_results = [mean_std_results; temp_mean_std]; +% close all +end + +mean_std_results_names = CISS_discovery_names'; +% +% [CISS_results, ~] = dp_FDR_adj(CISS_results); + +groups_to_choose = labels; +log_groups_disc = contains(input_discovery.data_collection.Labels, groups_to_choose); +% log_groups_rep = contains(input_replication.data_collection.Labels, groups_to_choose); +CISS_discovery_results_KW=[];CISS_discovery_results_Dunn=[]; +% CTQ_replication_results_KW=[];CTQ_replication_results_Dunn=[]; + +for i=1:size(CISS_discovery,2) + [p,tbl,stats] = kruskalwallis(CISS_discovery(log_groups_disc,i), input_discovery.data_collection.Labels(log_groups_disc)); + CISS_discovery_results_KW(i,:) = [p, tbl{2,5}, tbl{2,3}]; + CISS_discovery_results_Dunn{i,1} = multcompare(stats, 'Estimate', 'kruskalwallis', 'CType', 'dunn-sidak'); + close all + +% [p,tbl,stats] = kruskalwallis(CISS_replication(log_groups_rep,i), input_replication.data_collection.Labels(log_groups_rep)); +% CTQ_replication_results_KW(i,:) = [p, tbl{2,5}, tbl{2,3}]; +% CTQ_replication_results_Dunn{i,1} = multcompare(stats, 'Estimate', 'kruskalwallis', 'CType', 'dunn-sidak'); + +% close all + + % CTQ_discovery_results = [CTQ_discovery_results, +end + +[CISS_discovery_results_KW(:,1), ~] = dp_FDR_adj(CISS_discovery_results_KW(:,1)); +% [CISS_replication_results_KW(:,1), ~] = dp_FDR_adj(CISS_replication_results_KW(:,1)); + + + + + + diff --git a/Visualization_Module/dp_ciss_groups.m~ b/Visualization_Module/dp_ciss_groups.m~ new file mode 100644 index 0000000..cd980fd --- /dev/null +++ b/Visualization_Module/dp_ciss_groups.m~ @@ -0,0 +1,97 @@ +%% DP script for CISS differences + +%% new script to compute CTQ group differences + +m_discovery = matfile('/volume/HCStress/Analysis/02-Jul-2020/CISS_636_IQRadd_NCV55_single_folds_bestmerge_noval_min10_2020_5000AUC_Dev/final_results/result_final_vis.mat', 'Writable', true); +% m_replication = matfile('/volume/HCStress/Analysis/25-Feb-2020_CTQ_627_replication_sample_CTQ_fixed_datafile.mat', 'Writable', true); + +input_discovery = m_discovery.input; +% input_replication = m_replication.input; + +output_discovery = m_discovery.output; + +% load('/volume/HCStress/Analysis/replication_results_SPLS_Stress/validation_replication_detailed_results_comb_app_1.mat') + +% get CTQ data and compute total and subscale scores +CISS.total = 1:24; +CISS.task = [5,9,12,14,15,19,23,24]; +CISS.emotion = [2,3,7,8,11,13,16,21]; +CISS.avoidance_distraction = [4,6,10,22]; +CISS.social_diversion = [1,17,18,20]; +CISS_collection_discovery = input_discovery.behavior; +CISS_collection_discovery_names = input_discovery.behavior_names; +CISS_collection_discovery(:, [CISS.task, CISS.emotion, CISS.avoidance_distraction, CISS.social_diversion]) = input_discovery.behavior(:, 1:24); +CISS_collection_discovery_names(:, [CISS.task, CISS.emotion, CISS.avoidance_distraction, CISS.social_diversion]) = input_discovery.behavior_names(1:24); +CISS_discovery=[];CISS_replication=[]; +fields = fieldnames(CISS); +for i=1:size(fields,1) + CISS_discovery(:,i) = sum(CISS_collection_discovery(:, CISS.(fields{i})),2); +% CISS_replication(:,i) = sum(CTQ_collection_replication(:, CTQ.(fields{i})),2); +end + +% input_replication.CISS_collection = CISS_replication; +% input_replication.CISS_collection_names = fieldnames(CTQ)'; +output_discovery.CISS_collection = CISS_discovery; +output_discovery.CISS_collection_names = fieldnames(CISS)'; + +m_discovery.input = input_discovery; +% m_replication.input = input_replication; +m_discovery.output = output_discovery; + + +labels={'HC', 'ROD', 'CHR', 'ROP'}; +labels_disc = zeros(size(input_discovery.data_collection.Labels,1),1); +% labels_rep = ones(size(input_replication.data_collection.Labels,1),1); +CISS_results=[];temp_results=[];mean_std_results=[];temp_mean_std=[]; + +for i=1:size(CISS_discovery,2) +% temp_results=[];temp_mean_std=[]; +% temp_data = [CISS_discovery(:,i); CISS_replication(:,i)]; +% [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc; labels_rep]); +% temp_results = [temp_results, p]; + temp_mean_std = [nanmean(CISS_discovery(:,i)), nanstd(CISS_discovery(:,i))]; +% close all + for ii=1:size(labels,2) + log_temp_d = contains(input_discovery.data_collection.Labels, labels{ii}); +% log_temp_r = contains(input_replication.data_collection.Labels, labels{ii}); + temp_data = CISS_discovery(log_temp_d,i); + temp_mean_std = [temp_mean_std; [nanmean(CISS_discovery(log_temp_d,i)), nanstd(CISS_discovery(log_temp_d,i))]]; +% [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc(log_temp_d); labels_rep(log_temp_r)]); +% temp_results = [temp_results, p]; + end +% CISS_results = [CISS_results; temp_results]; + mean_std_results = [mean_std_results; temp_mean_std]; +% close all +end +% +% [CISS_results, ~] = dp_FDR_adj(CISS_results); + +groups_to_choose = {'HC', 'ROD', 'CHR', 'ROP'}; +log_groups_disc = contains(input_discovery.data_collection.Labels, groups_to_choose); +% log_groups_rep = contains(input_replication.data_collection.Labels, groups_to_choose); +CISS_discovery_results_KW=[];CISS_discovery_results_Dunn=[]; +% CTQ_replication_results_KW=[];CTQ_replication_results_Dunn=[]; + +for i=1:size(CISS_discovery,2) + [p,tbl,stats] = kruskalwallis(CISS_discovery(log_groups_disc,i), input_discovery.data_collection.Labels(log_groups_disc)); + CISS_discovery_results_KW(i,:) = [p, tbl{2,5}, tbl{2,3}]; + CISS_discovery_results_Dunn{i,1} = multcompare(stats, 'Estimate', 'kruskalwallis', 'CType', 'dunn-sidak'); + close all + +% [p,tbl,stats] = kruskalwallis(CISS_replication(log_groups_rep,i), input_replication.data_collection.Labels(log_groups_rep)); +% CTQ_replication_results_KW(i,:) = [p, tbl{2,5}, tbl{2,3}]; +% CTQ_replication_results_Dunn{i,1} = multcompare(stats, 'Estimate', 'kruskalwallis', 'CType', 'dunn-sidak'); + +% close all + + % CTQ_discovery_results = [CTQ_discovery_results, +end + +[CISS_discovery_results_KW(:,1), ~] = dp_FDR_adj(CISS_discovery_results_KW(:,1)); +% [CISS_replication_results_KW(:,1), ~] = dp_FDR_adj(CISS_replication_results_KW(:,1)); + + + + + + diff --git a/Visualization_Module/dp_cleanup_files.m b/Visualization_Module/dp_cleanup_files.m new file mode 100644 index 0000000..ab64dbb --- /dev/null +++ b/Visualization_Module/dp_cleanup_files.m @@ -0,0 +1,12 @@ +%% DP function for cleaning files from a specified folder + +function dp_cleanup_files(folder, file_ID) + +temp_dir = dir([folder, '/', file_ID, '*']); + +for i=1:size(temp_dir,1) + delete(temp_dir(i).name); +end + + +end \ No newline at end of file diff --git a/Visualization_Module/dp_corrections.asv b/Visualization_Module/dp_corrections.asv new file mode 100644 index 0000000..971e7d1 --- /dev/null +++ b/Visualization_Module/dp_corrections.asv @@ -0,0 +1,14 @@ +%% function for correction keep in and hold out data for covariates +% regress out covariate effects from TRAIN and then apply this correction to +% TEST, with betas from TRAIN and covariates from TEST + +function [TRAIN_c, TEST_c] = dp_corrections(TRAIN, TEST, TRAIN_covariates, TEST_covariates, sub) + +IN = struct; +IN.TrCovars = TRAIN_covariates; +IN.subgroup +[TRAIN_c, IN] = nk_PartialCorrelationsObj(TRAIN, IN); +IN.TsCovars = TEST_covariates; +[TEST_c, ~] = nk_PartialCorrelationsObj(TEST, IN); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_corrections.m b/Visualization_Module/dp_corrections.m new file mode 100644 index 0000000..04567e1 --- /dev/null +++ b/Visualization_Module/dp_corrections.m @@ -0,0 +1,15 @@ +%% function for correction keep in and hold out data for covariates +% regress out covariate effects from TRAIN and then apply this correction to +% TEST, with betas from TRAIN and covariates from TEST + +function [TRAIN_c, TEST_c] = dp_corrections(TRAIN, TEST, TRAIN_covariates, TEST_covariates, TRAIN_subgroups, TEST_subgroups) + +IN = struct; +IN.TrCovars = TRAIN_covariates; +IN.subgroup = TRAIN_subgroups; +[TRAIN_c, IN] = nk_PartialCorrelationsObj(TRAIN, IN); +IN.TsCovars = TEST_covariates; +IN.subgroup = TEST_subgroups; +[TEST_c, ~] = nk_PartialCorrelationsObj(TEST, IN); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_corrections_multi.m b/Visualization_Module/dp_corrections_multi.m new file mode 100644 index 0000000..394c384 --- /dev/null +++ b/Visualization_Module/dp_corrections_multi.m @@ -0,0 +1,25 @@ +%% function for correction keep in and hold out data for covariates +% regress out covariate effects from TRAIN and then apply this correction to +% TEST, with betas from TRAIN and covariates from TEST + +function [TRAIN_c, TEST_c] = dp_corrections_multi(TRAIN, TEST, TRAIN_covariates, TEST_covariates) + +switch corr_option + case 1 % use partial correlations to correct data + IN = struct; + IN.TrCovars = TRAIN_covariates; + [TRAIN_c, IN] = nk_PartialCorrelationsObj(TRAIN, IN); + IN.TsCovars = TEST_covariates; + [TEST_c, ~] = nk_PartialCorrelationsObj(TEST, IN); + + case 2 % use PCA correction + IN.S = TRAIN; + IN.G = TRAIN_covariates; + [TRAIN_c, IN, ~] = nk_PerfAdjForCovarsUsingPCAObj(TRAIN, IN); + + IN.S = TRAIN; + IN.G = TEST_covariates; + [TEST_c, ~, ~] = nk_PerfAdjForCovarsUsingPCAObj(TEST, IN); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_correctscale.m b/Visualization_Module/dp_correctscale.m new file mode 100644 index 0000000..374e787 --- /dev/null +++ b/Visualization_Module/dp_correctscale.m @@ -0,0 +1,65 @@ +%% DP function for combined correction and scaling training and testing data + +function OUT = dp_correctscale(DAT, COV, MET) + +IN_s = struct; +IN_s.method = MET.method; +if ~isempty(MET.correction_subgroup) + IN_s.subgroup_train = MET.subgroup_train; + IN_s.subgroup_test = MET.subgroup_test; +else + IN_s.subgroup_train = []; + IN_s.subgroup_test = []; +end + +if isnumeric(DAT) % only operate on one dataset + if ~any(isnan(COV)) + if any(sum(COV,1)==0) || any(range(COV)==0) + log_remove = sum(COV,1)==0; + COV(:,log_remove) = []; + end + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [DAT_s, ~] = dp_standardize(DAT, IN_s); + [COV_s, ~] = dp_standardize(COV, IN_s); + + IN_c.TrCovars = COV_s; + [DAT_sc, ~] = nk_PartialCorrelationsObj(DAT_s, IN_c); + [OUT, ~] = dp_standardize(DAT_sc, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + else + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [OUT, ~] = dp_standardize(DAT, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + end + +elseif isstruct(DAT) % perform on train and apply to test dataset + if any(~any(isnan(COV.test))) + % standardization of data and covariates + [COV.train_s, COV.test_s] = dp_standardize_comb(COV.train, COV.test, IN_s); + [DAT.train_s, DAT.test_s] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + + % correction of data + [DAT.train_sc, DAT.test_sc] = dp_corrections(DAT.train_s, DAT.test_s, COV.train_s, COV.test_s, IN_s.subgroup_train, IN_s.subgroup_test); + + % standardization of data + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train_sc, DAT.test_sc, IN_s); + else + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_correctscale_extract.m b/Visualization_Module/dp_correctscale_extract.m new file mode 100644 index 0000000..7ce3411 --- /dev/null +++ b/Visualization_Module/dp_correctscale_extract.m @@ -0,0 +1,65 @@ +%% DP function for combined correction and scaling training and testing data + +function [OUT, IN_c] = dp_correctscale_extract(DAT, COV, MET) + +IN_s = struct; +IN_s.method = MET.method; +if ~isempty(MET.correction_subgroup) + IN_s.subgroup_train = MET.subgroup_train; + IN_s.subgroup_test = MET.subgroup_test; +else + IN_s.subgroup_train = []; + IN_s.subgroup_test = []; +end + +if isnumeric(DAT) % only operate on one dataset + if ~any(isnan(COV)) + if any(sum(COV,1)==0) || any(range(COV)==0) + log_remove = sum(COV,1)==0; + COV(:,log_remove) = []; + end + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [DAT_s, ~] = dp_standardize(DAT, IN_s); + [COV_s, ~] = dp_standardize(COV, IN_s); + + IN_c.TrCovars = COV_s; + [DAT_sc, IN_c] = nk_PartialCorrelationsObj(DAT_s, IN_c); + [OUT, ~] = dp_standardize(DAT_sc, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + else + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [OUT, ~] = dp_standardize(DAT, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + end + +elseif isstruct(DAT) % perform on train and apply to test dataset + if any(~any(isnan(COV.test))) + % standardization of data and covariates + [COV.train_s, COV.test_s] = dp_standardize_comb(COV.train, COV.test, IN_s); + [DAT.train_s, DAT.test_s] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + + % correction of data + [DAT.train_sc, DAT.test_sc] = dp_corrections(DAT.train_s, DAT.test_s, COV.train_s, COV.test_s, IN_s.subgroup_train, IN_s.subgroup_test); + + % standardization of data + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train_sc, DAT.test_sc, IN_s); + else + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_correctscale_mult.m b/Visualization_Module/dp_correctscale_mult.m new file mode 100644 index 0000000..7fbc795 --- /dev/null +++ b/Visualization_Module/dp_correctscale_mult.m @@ -0,0 +1,65 @@ +%% DP function for combined correction and scaling training and testing data + +function OUT = dp_correctscale_mult(DAT, COV, MET) + +IN_s = struct; +IN_s.method = MET; + +switch correction_algorithm + case 1 + correction_function = nk_PartialCorrelationsObj; + case 2 + correction_function = nk_PerfAdjForCovarsUsingPCAObj; +end + +if isnumeric(DAT) % only operate on one dataset + if ~any(isnan(COV)) + if any(sum(COV,1)==0) || any(range(COV)==0) + log_remove = sum(COV,1)==0; + COV(:,log_remove) = []; + end + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [DAT_s, ~] = dp_standardize(DAT, IN_s); + [COV_s, ~] = dp_standardize(COV, IN_s); + + IN_c.TrCovars = COV_s; + [DAT_sc, ~] = nk_PartialCorrelationsObj(DAT_s, IN_c); + [OUT, ~] = dp_standardize(DAT_sc, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + else + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [OUT, ~] = dp_standardize(DAT, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + end + +elseif isstruct(DAT) % perform on train and apply to test dataset + if ~any(isnan(COV.test)) + % standardization of data and covariates + [COV.train_s, COV.test_s] = dp_standardize_comb(COV.train, COV.test, IN_s); + [DAT.train_s, DAT.test_s] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + + % correction of data + [DAT.train_sc, DAT.test_sc] = dp_corrections(DAT.train_s, DAT.test_s, COV.train_s, COV.test_s); + + % standardization of data + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train_sc, DAT.test_sc, IN_s); + else + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_correctscale_multi.m b/Visualization_Module/dp_correctscale_multi.m new file mode 100644 index 0000000..f82c01c --- /dev/null +++ b/Visualization_Module/dp_correctscale_multi.m @@ -0,0 +1,67 @@ +%% DP function for combined correction and scaling training and testing data + +function OUT = dp_correctscale_multi(DAT, COV, MET) + +IN_s = struct; +IN_s.method = MET; + +if isnumeric(DAT) % only operate on one dataset + if ~any(isnan(COV)) + if any(sum(COV,1)==0) || any(range(COV)==0) + log_remove = sum(COV,1)==0; + COV(:,log_remove) = []; + end + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [DAT_s, ~] = dp_standardize(DAT, IN_s); + [COV_s, ~] = dp_standardize(COV, IN_s); + + switch corr_option + case 1 % use partial correlations to correct data + IN_c.TrCovars = COV_s; + [DAT_sc, ~] = nk_PartialCorrelationsObj(DAT_s, IN_c); + + case 2 + IN_c.S = DAT_s; + IN_c.G = COV_s; + [DAT_sc, ~, ~] = nk_PerfAdjForCovarsUsingPCAObj(DAT_s, IN_c); + end + + [OUT, ~] = dp_standardize(DAT_sc, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + else + if any(sum(DAT,1)==0) || any(range(DAT)==0) + log_remove = double(sum(DAT,1)==0) + double(range(DAT)==0)>0; + DAT_save = DAT; + DAT(:,log_remove) = []; + end + [OUT, ~] = dp_standardize(DAT, IN_s); + if exist('DAT_save', 'var') + DAT_save(:,~log_remove) = OUT; + OUT = DAT_save; + end + end + +elseif isstruct(DAT) % perform on train and apply to test dataset + if ~any(isnan(COV.test)) + % standardization of data and covariates + [COV.train_s, COV.test_s] = dp_standardize_comb(COV.train, COV.test, IN_s); + [DAT.train_s, DAT.test_s] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + + % correction of data + [DAT.train_sc, DAT.test_sc] = dp_corrections(DAT.train_s, DAT.test_s, COV.train_s, COV.test_s); + + % standardization of data + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train_sc, DAT.test_sc, IN_s); + else + [OUT.train, OUT.test] = dp_standardize_comb(DAT.train, DAT.test, IN_s); + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_create_folders.m b/Visualization_Module/dp_create_folders.m new file mode 100644 index 0000000..915dfd7 --- /dev/null +++ b/Visualization_Module/dp_create_folders.m @@ -0,0 +1,17 @@ +%% DP function to create folders for analysis + +function [permutation_folder, hyperopt_folder, bootstrap_folder, detailed_results, final_results] = dp_create_folders(temp_folder, final_folder, analysis_name) + +permutation_folder = [temp_folder, '/' analysis_name, '/permutation']; % folder for permutation testing +mkdir(permutation_folder); +hyperopt_folder = [temp_folder, '/' analysis_name, '/hyperopt']; % folder for permutation testing +mkdir(hyperopt_folder); +bootstrap_folder = [temp_folder, '/' analysis_name, '/bootstrap']; % folder for bootstrapping +mkdir(bootstrap_folder); + +detailed_results = [final_folder '/detailed_results']; +mkdir(detailed_results); +final_results = [final_folder '/final_results']; +mkdir(final_results); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_ctq_groups.m b/Visualization_Module/dp_ctq_groups.m new file mode 100644 index 0000000..890e5ec --- /dev/null +++ b/Visualization_Module/dp_ctq_groups.m @@ -0,0 +1,153 @@ +%% new script to compute CTQ group differences + +m_discovery = matfile('/volume/HCStress/Analysis/27-Mar-2019/DP_CTQ_allgroups_649_GM_80PI_12GO_110X060_40GD_110Y010_40GD_correct10_10_XY_SC_3_Diag/final_results/result_new.mat'); +m_replication = matfile('/volume/HCStress/Analysis/25-Feb-2020_CTQ_627_replication_sample_CTQ_fixed_datafile.mat'); + +input_discovery = m_discovery.input; +input_replication = m_replication.input; + +% get CTQ data and compute total and subscale scores +CTQ.total = 1:25; +CTQ.emotional_abuse = 1:5; %[3,8,14,18,25]; +CTQ.physical_abuse = 6:10; %[9,11,12,15,17]; +CTQ.sexual_abuse = 11:15; %[20,21,23,24,27]; +CTQ.emotional_neglect = 16:20; %[5,7,13,19,28]; +CTQ.physical_neglect = 21:25; %[1,2,4,6,26]; +CTQ.denial = 26:28; %[10,16,22]; +CTQ_discovery=[];CTQ_replication=[]; +fields = fieldnames(CTQ); +for i=1:size(fields,1) + CTQ_discovery(:,i) = sum(input_discovery.data_collection.data(:, CTQ.(fields{i})),2); + CTQ_replication(:,i) = sum(input_replication.data_collection.data(:, CTQ.(fields{i})),2); +end + +labels={'HC', 'ROD', 'CHR', 'ROP'}; +labels_disc = zeros(size(input_discovery.data_collection.Labels,1),1); +labels_rep = ones(size(input_replication.data_collection.Labels,1),1); +CTQ_results=[];temp_results=[]; + +for i=1:size(CTQ_discovery,2) + temp_results=[]; + temp_data = [CTQ_discovery(:,i); CTQ_replication(:,i)]; + [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc; labels_rep]); + temp_results = [temp_results, p]; + close all + for ii=1:size(labels,2) + log_temp_d = contains(input_discovery.data_collection.Labels, labels{ii}); + log_temp_r = contains(input_replication.data_collection.Labels, labels{ii}); + temp_data = [CTQ_discovery(log_temp_d,i); CTQ_replication(log_temp_r,i)]; + [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc(log_temp_d); labels_rep(log_temp_r)]); + temp_results = [temp_results, p]; + end + CTQ_results = [CTQ_results; temp_results]; + close all +end + + + +temp = input.data_collection.data; +for i=1:size(temp,1) + CTQ_data_subscales(i,:) = [sum(temp(i,CTQ.emotional_abuse)), sum(temp(i,CTQ.physical_abuse)), sum(temp(i,CTQ.sexual_abuse)), sum(temp(i,CTQ.emotional_neglect)), sum(temp(i,CTQ.physical_neglect)), sum(temp(i,CTQ.denial))]; +end + +CTQ_data = [CTQ_data_subscales, CTQ_data_total]; +CTQ_data_names = {'emotional_abuse', 'physical_abuse', 'sexual_abuse', 'emotional_neglect', 'physical_neglect', 'denial', 'total'}; +groups_to_choose = {'HC', 'ROD', 'CHR', 'ROP'}; +log_groups_to_choose = ismember(input.data_collection.Labels, groups_to_choose); + +s_folder = '/volume/HCStress/Analysis/27-Mar-2019/DP_CTQ_allgroups_649_GM_80PI_12GO_110X060_40GD_110Y010_40GD_correct10_10_XY_SC_3_Diag/final_results'; + +for i=1:size(CTQ_data,2) + [p,tbl,stats] = kruskalwallis(CTQ_data(log_groups_to_choose,i), input.data_collection.Labels(log_groups_to_choose)); + output.CTQ.KW_results(i,:) = [tbl{2,5}, p]; + output.CTQ.KW_results_ext(i,:) = {p,tbl,stats}; + [output.CTQ.Dunn_results{i,1}, ~, ~, output.CTQ.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all +% dp_txt_write(s_folder, ['Dunn_results_CTQ_', num2str(i)], output.CTQ.Dunn_results{i,1}', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \t %.3f \t \n \n'); +end + +% dp_txt_write(s_folder, 'KW_CTQ_results', output.CTQ.KW_results', '%.3f \t %.3f \n \n'); +% dp_txt_write(s_folder, 'Dunn_CTQ_labels', '', '%s'); +% for i=1:size(output.CTQ.Dunn_results_labels,1) +% FID = fopen([s_folder, '/Dunn_CTQ_labels.txt'], 'a'); +% fprintf(FID, '%s\n', output.CTQ.Dunn_results_labels{i,1}); +% fclose(FID); +% dp_txt_write(s_folder, 'Dunn_CTQ_labels', [output.CTQ.Dunn_results_labels{:}], '%s \n'); +% end + +% adjust p values for multiple testing +[output.CTQ.KW_results_FDR_threshold,~,output.CTQ.KW_results(:,2)] = fdr(output.CTQ.KW_results(:,2)); + +% get mean and SD values for CTQ data +descriptive_groups = {'HC', 'ROD', 'CHR', 'ROP'}; +log_groups_to_choose = ismember(input.data_collection.Labels, descriptive_groups); +temp_descriptive_stats=[]; +temp_all_descriptive_stats=[]; +for i=1:size(CTQ_data,2) + temp_mean_all = mean(CTQ_data(:,i)); + temp_std_all = std(CTQ_data(:,i)); + temp_all_descriptive_stats=[temp_all_descriptive_stats;temp_mean_all; temp_std_all]; + for ii=1:size(descriptive_groups,2) + temp_means(1,ii) = mean(CTQ_data(ismember(input.data_collection.Labels, descriptive_groups{ii}),i)); + temp_std(1,ii) = std(CTQ_data(ismember(input.data_collection.Labels, descriptive_groups{ii}),i)); + end + temp_descriptive_stats=[temp_descriptive_stats;temp_means; temp_std]; +end + +output.CTQ.descriptive_stats = [temp_all_descriptive_stats, temp_descriptive_stats]; + +save(results_path, 'input', 'setup', 'output'); + +% [pthr,pcor,padj] = fdr(pvals) +% +% pvals=[0.127357313939555;0.223286174661224;0.987758230437541; 0.203652541916409;0.255893623925517;0.998512378851066;... +% 0.994306327317029;0.608632053506984;0.767610213762934;0.997076468205630;0.558523555856795;0.691558985229927;... +% 0.0583756086858885;0.609101209753551;0.511036107759435;0.0412122143925899;0.0374061120934872;0.999999972063131;... +% 0.170652060156671;0.0674167894612940;0.977288374963093] +% padj' + +log_groups_to_choose = input.data_collection.sex>0; + +s_folder = '/volume/HCStress/Analysis/27-Mar-2019/DP_CTQ_allgroups_649_GM_80PI_12GO_110X060_40GD_110Y010_40GD_correct10_10_XY_SC_3_Diag/final_results'; + +for i=1:size(CTQ_data,2) + [p,h,stats] = ranksum(CTQ_data(log_groups_to_choose(:,1),i), CTQ_data(log_groups_to_choose(:,2),i)); + output.CTQ.male_female_MW_results(i,:) = [h, p]; + output.CTQ.male_female_MW_results_ext(i,:) = {p,h,stats}; +% [output.CTQ.Dunn_results{i,1}, ~, ~, output.CTQ.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all +% dp_txt_write(s_folder, ['Dunn_results_CTQ_', num2str(i)], output.CTQ.Dunn_results{i,1}', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \t %.3f \t \n \n'); +end + +dp_txt_write(s_folder, 'male_female_MW_results', output.CTQ.male_female_MW_results', '%.3f \t %.3f \n \n'); +% dp_txt_write(s_folder, 'Dunn_CTQ_labels', '', '%s'); +% for i=1:size(output.CTQ.Dunn_results_labels,1) +% FID = fopen([s_folder, '/Dunn_CTQ_labels.txt'], 'a'); +% fprintf(FID, '%s\n', output.CTQ.Dunn_results_labels{i,1}); +% fclose(FID); +% dp_txt_write(s_folder, 'Dunn_CTQ_labels', [output.CTQ.Dunn_results_labels{:}], '%s \n'); +% end + +% adjust p values for multiple testing +[output.CTQ.male_female_MW_results_FDR_threshold,~,output.CTQ.male_female_MW_results(:,2)] = fdr(output.CTQ.male_female_MW_results(:,2)); + +% get mean and SD values for CTQ data +descriptive_groups = {'male', 'female'}; +log_groups_to_choose = input.data_collection.sex>0; +temp_descriptive_stats=[]; +temp_all_descriptive_stats=[]; +for i=1:size(CTQ_data,2) + temp_mean_all = mean(CTQ_data(:,i)); + temp_std_all = std(CTQ_data(:,i)); + temp_all_descriptive_stats=[temp_all_descriptive_stats;temp_mean_all; temp_std_all]; + for ii=1:size(descriptive_groups,2) + temp_means(1,ii) = mean(CTQ_data(log_groups_to_choose(:,ii),i)); + temp_std(1,ii) = std(CTQ_data(log_groups_to_choose(:,ii),i)); + end + temp_descriptive_stats=[temp_descriptive_stats;temp_means; temp_std]; +end + + + +save(results_path, 'input', 'setup', 'output'); + diff --git a/Visualization_Module/dp_ctq_groups_new.m b/Visualization_Module/dp_ctq_groups_new.m new file mode 100644 index 0000000..aae86f3 --- /dev/null +++ b/Visualization_Module/dp_ctq_groups_new.m @@ -0,0 +1,101 @@ +%% new script to compute CTQ group differences + +% m_discovery = matfile('/volume/HCStress/Analysis/27-Mar-2019/DP_CTQ_allgroups_649_GM_80PI_12GO_110X060_40GD_110Y010_40GD_correct10_10_XY_SC_3_Diag/final_results/result_new.mat', 'Writable', true); +% m_replication = matfile('/volume/HCStress/Analysis/25-Feb-2020_CTQ_627_replication_sample_CTQ_fixed_datafile.mat', 'Writable', true); +% +% input_discovery = m_discovery.input; +% input_replication = m_replication.input; +% +% output_discovery = m_discovery.output; + +load('/volume/HCStress/Analysis/replication_results_SPLS_Stress/validation_replication_detailed_results_comb_app_1.mat') + +% update neglect subscales +to_add1 = {'CTQ_02', 'CTQ_05', 'CTQ_07', 'CTQ_13', 'CTQ_19', 'CTQ_26', 'CTQ_28'}; +CTQ_collection_discovery = input_discovery.data_collection.data; +CTQ_collection_discovery(:, contains(input_discovery.data_collection.names, to_add1)) = CTQ_collection_discovery(:, contains(input_discovery.data_collection.names, to_add1))+1; + +CTQ_collection_replication = input_replication.data_collection.data; +CTQ_collection_replication(:, contains(input_replication.data_collection.names, to_add1)) = CTQ_collection_replication(:, contains(input_replication.data_collection.names, to_add1))+1; + +% get CTQ data and compute total and subscale scores +CTQ.total = 1:25; +CTQ.emotional_abuse = 1:5; %[3,8,14,18,25]; +CTQ.physical_abuse = 6:10; %[9,11,12,15,17]; +CTQ.sexual_abuse = 11:15; %[20,21,23,24,27]; +CTQ.emotional_neglect = 16:20; %[5,7,13,19,28]; +CTQ.physical_neglect = 21:25; %[1,2,4,6,26]; +CTQ.denial = 26:28; %[10,16,22]; +CTQ_discovery=[];CTQ_replication=[]; +fields = fieldnames(CTQ); +for i=1:size(fields,1) + CTQ_discovery(:,i) = sum(CTQ_collection_discovery(:, CTQ.(fields{i})),2); + CTQ_replication(:,i) = sum(CTQ_collection_replication(:, CTQ.(fields{i})),2); +end + +input_replication.CTQ_collection = CTQ_replication; +input_replication.CTQ_collection_names = fieldnames(CTQ)'; +output_discovery.CTQ_collection = CTQ_discovery; +output_discovery.CTQ_collection_names = fieldnames(CTQ)'; + +m_discovery.input = input_discovery; +m_replication.input = input_replication; +m_discovery.output = output_discovery; + + +labels={'HC', 'ROD', 'CHR', 'ROP'}; +labels_disc = zeros(size(input_discovery.data_collection.Labels,1),1); +labels_rep = ones(size(input_replication.data_collection.Labels,1),1); +CTQ_results=[];temp_results=[];mean_std_results=[];temp_mean_std=[]; + +for i=1:size(CTQ_discovery,2) + temp_results=[];temp_mean_std=[]; + temp_data = [CTQ_discovery(:,i); CTQ_replication(:,i)]; + [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc; labels_rep]); + temp_results = [temp_results, p]; + temp_mean_std = [nanmean(CTQ_discovery(:,i)), nanstd(CTQ_discovery(:,i)); nanmean(CTQ_replication(:,i)), nanstd(CTQ_replication(:,i))]; + close all + for ii=1:size(labels,2) + log_temp_d = contains(input_discovery.data_collection.Labels, labels{ii}); + log_temp_r = contains(input_replication.data_collection.Labels, labels{ii}); + temp_data = [CTQ_discovery(log_temp_d,i); CTQ_replication(log_temp_r,i)]; + temp_mean_std = [temp_mean_std, [nanmean(CTQ_discovery(log_temp_d,i)), nanstd(CTQ_discovery(log_temp_d,i)); nanmean(CTQ_replication(log_temp_r,i)), nanstd(CTQ_replication(log_temp_r,i))]]; + [p,tbl,stats] = kruskalwallis(temp_data, [labels_disc(log_temp_d); labels_rep(log_temp_r)]); + temp_results = [temp_results, p]; + end + CTQ_results = [CTQ_results; temp_results]; + mean_std_results = [mean_std_results; temp_mean_std]; + close all +end + +[CTQ_results, ~] = dp_FDR_adj(CTQ_results); + +groups_to_choose = {'ROD', 'CHR', 'ROP'}; +log_groups_disc = contains(input_discovery.data_collection.Labels, groups_to_choose); +log_groups_rep = contains(input_replication.data_collection.Labels, groups_to_choose); +CTQ_discovery_results_KW=[];CTQ_discovery_results_Dunn=[]; +CTQ_replication_results_KW=[];CTQ_replication_results_Dunn=[]; + +for i=1:size(CTQ_discovery,2) + [p,tbl,stats] = kruskalwallis(CTQ_discovery(log_groups_disc,i), input_discovery.data_collection.Labels(log_groups_disc)); + CTQ_discovery_results_KW(i,:) = [p, tbl{2,5}, tbl{2,3}]; + CTQ_discovery_results_Dunn{i,1} = multcompare(stats, 'Estimate', 'kruskalwallis', 'CType', 'dunn-sidak'); + close all + + [p,tbl,stats] = kruskalwallis(CTQ_replication(log_groups_rep,i), input_replication.data_collection.Labels(log_groups_rep)); + CTQ_replication_results_KW(i,:) = [p, tbl{2,5}, tbl{2,3}]; + CTQ_replication_results_Dunn{i,1} = multcompare(stats, 'Estimate', 'kruskalwallis', 'CType', 'dunn-sidak'); + + close all + + % CTQ_discovery_results = [CTQ_discovery_results, +end + +[CTQ_discovery_results_KW(:,1), ~] = dp_FDR_adj(CTQ_discovery_results_KW(:,1)); +[CTQ_replication_results_KW(:,1), ~] = dp_FDR_adj(CTQ_replication_results_KW(:,1)); + + + + + + diff --git a/Visualization_Module/dp_cu_cv.m b/Visualization_Module/dp_cu_cv.m new file mode 100644 index 0000000..d8cf2c3 --- /dev/null +++ b/Visualization_Module/dp_cu_cv.m @@ -0,0 +1,95 @@ +%% DP function for cu and cv setup + +function [cu_cv_combination, hyperopt_sets] = dp_cu_cv(X, Y, grid_x, grid_y, size_sets_hyperopt) + +% % check if previous cu/cv files exist and clear the hyperopt folder +% cu_dir = dir([hyperopt_folder, '/cu*']); +% cv_dir = dir([hyperopt_folder, '/cv*']); +% if size(cu_dir,1)>0 +% for i=1:size(cu_dir,1) +% delete([hyperopt_folder, '/' cu_dir(i).name]); +% end +% end +% +% if size(cv_dir,1)>0 +% for i=1:size(cv_dir,1) +% delete([hyperopt_folder, '/' cv_dir(i).name]); +% end +% end + +% cu_range and cv_range define the range for the grid search. The grid +% search is performed along 20 points from 1 to sqrt(number of variables) +% as proposed in Monteiro et al. 2016. + +% create the original 100-point grid between 1 and sqrt(size(x,2)) +cu_range_temp_1 = linspace(1,sqrt(size(X,2)),20); +cu_range_temp = linspace(cu_range_temp_1(2),cu_range_temp_1(end),100); +cv_range_temp_1 = linspace(1,sqrt(size(Y,2)),20); +cv_range_temp = linspace(cv_range_temp_1(2),cv_range_temp_1(end),100); + +% check if there are new start and end points for the grid, if not, then +% use defaults +if ~isfield(grid_x, 'start') + grid_x.start = 1; +end + +if ~isfield(grid_x, 'end') + grid_x.end = 0; +end + +if ~isfield(grid_x, 'density') + grid_x.density = 20; +end + +if ~isfield(grid_y, 'start') + grid_y.start = 1; +end + +if ~isfield(grid_y, 'end') + grid_y.end = 0; +end + +if ~isfield(grid_y, 'density') + grid_y.density = 20; +end + +% apply start and end points to generic grid +cu_range_points = linspace(cu_range_temp(grid_x.start),cu_range_temp(end-grid_x.end),grid_x.density); +cv_range_points = linspace(cv_range_temp(grid_y.start),cv_range_temp(end-grid_y.end),grid_y.density); + +% compile a matrix with separate row for all possible cu and cv +% combinations by taking cu and repeating every single element X times and +% then takin cv and repeating the entire vector X times +cu_cv_combination = zeros(size(cu_range_points,2)*size(cv_range_points,2),2); +nn=1; +for i=1:size(cu_range_points,2) + for ii=1:size(cv_range_points,2) + cu_cv_combination(nn,:) = [cu_range_points(i), cv_range_points(ii)]; + nn=nn+1; + end +end + +% set up cu and cv for hyperparameter optimization +rest_hyperopt = mod(size(cu_cv_combination,1),size_sets_hyperopt); +if rest_hyperopt>0 + hyperopt_sets = ((size(cu_cv_combination,1) - rest_hyperopt)/size_sets_hyperopt)+1; +else + hyperopt_sets = ((size(cu_cv_combination,1) - rest_hyperopt)/size_sets_hyperopt); +end + +% nn=1; +% for i=1:hyperopt_sets +% for ii=1:size_sets_hyperopt +% try +% temp1 = cu_cv_combination(nn,1); +% dp_txt_write(hyperopt_folder, ['cu_', num2str(i), '_', num2str(ii)], temp1, '%.4f'); +% temp2 = cu_cv_combination(nn,2); +% dp_txt_write(hyperopt_folder, ['cv_', num2str(i), '_', num2str(ii)], temp2, '%.4f'); +% nn=nn+1; +% catch +% break +% end +% end +% end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_cu_cv_ext.m b/Visualization_Module/dp_cu_cv_ext.m new file mode 100644 index 0000000..03f6675 --- /dev/null +++ b/Visualization_Module/dp_cu_cv_ext.m @@ -0,0 +1,73 @@ +%% DP function for cu and cv setup + +function [cu_cv_combination, hyperopt_sets] = dp_cu_cv_ext(X, Y, grid_x, grid_y, size_sets_hyperopt) + +% cu_range and cv_range define the range for the grid search. The grid +% search is performed along 20 points from 1 to sqrt(number of variables) +% as proposed in Monteiro et al. 2016. + +% create the original 100-point grid between 1 and sqrt(size(x,2)) +cu_range_temp_1 = linspace(1,sqrt(size(X,2)),20); +cu_range_temp = linspace(cu_range_temp_1(2),cu_range_temp_1(end),100); +cv_range_temp_1 = linspace(1,sqrt(size(Y,2)),20); +cv_range_temp = linspace(cv_range_temp_1(2),cv_range_temp_1(end),100); + +% check if there are new start and end points for the grid, if not, then +% use defaults +if ~isfield(grid_x, 'start') + grid_x.start = 1; +end + +if ~isfield(grid_x, 'end') + grid_x.end = 0; +end + +if ~isfield(grid_x, 'density') + grid_x.density = 20; +end + +if ~isfield(grid_y, 'start') + grid_y.start = 1; +end + +if ~isfield(grid_y, 'end') + grid_y.end = 0; +end + +if ~isfield(grid_y, 'density') + grid_y.density = 20; +end + +% apply start and end points to generic grid +cu_range_points = linspace(cu_range_temp(grid_x.start),cu_range_temp(end-grid_x.end),grid_x.density); +cv_range_points = linspace(cv_range_temp(grid_y.start),cv_range_temp(end-grid_y.end),grid_y.density); + +if grid_x.end ~= 0 + cu_range_points(end) = sqrt(size(X,2)); +end + +if grid_y.end ~= 0 + cv_range_points(end) = sqrt(size(Y,2)); +end + +% compile a matrix with separate row for all possible cu and cv +% combinations by taking cu and repeating every single element X times and +% then takin cv and repeating the entire vector X times +cu_cv_combination = zeros(size(cu_range_points,2)*size(cv_range_points,2),2); +nn=1; +for i=1:size(cu_range_points,2) + for ii=1:size(cv_range_points,2) + cu_cv_combination(nn,:) = [cu_range_points(i), cv_range_points(ii)]; + nn=nn+1; + end +end + +% set up cu and cv for hyperparameter optimization +rest_hyperopt = mod(size(cu_cv_combination,1),size_sets_hyperopt); +if rest_hyperopt>0 + hyperopt_sets = ((size(cu_cv_combination,1) - rest_hyperopt)/size_sets_hyperopt)+1; +else + hyperopt_sets = ((size(cu_cv_combination,1) - rest_hyperopt)/size_sets_hyperopt); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_decision_voxel.m b/Visualization_Module/dp_decision_voxel.m new file mode 100644 index 0000000..4804a21 --- /dev/null +++ b/Visualization_Module/dp_decision_voxel.m @@ -0,0 +1,35 @@ +%% DP decision function for finding optimal threshold for voxels + +decision = 'weights'; +switch decision + case 'voxels' + n=2; + case 'weights' + n=4; +end + +decision1 = 'negative'; +switch decision1 + case 'positive' + nn=1; + case 'negative' + nn=2; +end + +f=figure(); +bar(abs(cell2mat(output.regions.count.(decision){3,nn}(:,n)))); +set(gca, 'XTickLabel', output.regions.count.(decision){3,nn}(:,1), 'XTick', 1:numel(output.regions.count.(decision){3,nn}(:,n))); +title([decision, ' ', decision1]); + +f=figure(); +bar(abs(cell2mat(output.regions.count.weights{3,1}(:,4)))); +set(gca, 'XTickLabel', output.regions.count.weights{3,1}(:,1), 'XTick', 1:numel(output.regions.count.weights{3,1}(:,4))); + +temp = cell2mat(output.regions.count.(decision){3,nn}(:,n)); +for i=1:(size(temp,1)-1) + deltas(i)=(abs(temp(i))-abs(temp(i+1)))/abs(temp(i)); +end + +[val, ind] = max(deltas); + + diff --git a/Visualization_Module/dp_deflatescale.m b/Visualization_Module/dp_deflatescale.m new file mode 100644 index 0000000..fc89028 --- /dev/null +++ b/Visualization_Module/dp_deflatescale.m @@ -0,0 +1,13 @@ +%% DP function for scaling and deflating + +function [IN_x, IN_y] = dp_deflatescale(IN_x, IN_y, u_collection, v_collection, scaling_method) +IN_s.method = scaling_method; +for dd=1:size(u_collection,2) + [IN_x.train, IN_x.test] = dp_standardize_comb(IN_x.train, IN_x.test, IN_s); + [IN_y.train, IN_y.test_s] = dp_standardize_comb(IN_y.train, IN_y.test, IN_s); + + [IN_x.train,IN_y.train] = proj_def(IN_x.train, IN_y.train, u_collection(:,dd), v_collection(:,dd)); + [IN_x.test,IN_y.test] = proj_def(IN_x.test, IN_y.test, u_collection(:,dd), v_collection(:,dd)); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_fdr_posthoc_adjust.m b/Visualization_Module/dp_fdr_posthoc_adjust.m new file mode 100644 index 0000000..4c1e909 --- /dev/null +++ b/Visualization_Module/dp_fdr_posthoc_adjust.m @@ -0,0 +1,53 @@ +%% additional FDR testing + +function output = dp_fdr_posthoc_adjust(results_path) + +load(results_path); + +fields1 = fieldnames(output.hold_out_correlations); +output.adj_tables_holdout_RHO_p = output.tables_hold_out_Rho_p; +output.adj_FDR_values = output.hold_out_FDR_values; +for i=1:size(fields1,1) + fields2 = fieldnames(output.hold_out_correlations.(['LV_', num2str(i)])); + for ii=1:size(fields2,1) + fields3 = fieldnames(output.hold_out_correlations.(['LV_', num2str(i)]).(fields2{ii})); + for iii=1:size(fields3,1) + fields4 = fieldnames(output.hold_out_correlations.(['LV_', num2str(i)]).(fields2{ii}).(fields3{iii})); + temp_p_list.(fields1{i}).(fields2{ii}).(fields3{iii}) = []; + for iiii=1:size(fields4,1) + temp_p = output.hold_out_correlations.(['LV_', num2str(i)]).(fields2{ii}).(fields3{iii}).(fields4{iiii})(1,2); + temp_p_list.(fields1{i}).(fields2{ii}).(fields3{iii}) = [temp_p_list.(fields1{i}).(fields2{ii}).(fields3{iii}); temp_p]; + end + [output.adj_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii}),~,output.adj_tables_holdout_RHO_p.(fields1{i}).(fields2{ii})(2:2:end,iii)] = fdr(temp_p_list.(fields1{i}).(fields2{ii}).(fields3{iii})); + end + + end + +end + +save(results_path, 'setup', 'input', 'output'); + +if any(strfind(results_path, 'CTQ')) + overall_folder = '/volume/HCStress/Analysis/Stress'; +elseif any(strfind(results_path, 'CISS')) + overall_folder = '/volume/HCStress/Analysis/Resilience'; +end + +folder_name = results_path(5+strfind(results_path, '2019'):(strfind(results_path, '/final_results')-1)); +collection_folder = [overall_folder, '/', folder_name]; +corr_folder = [collection_folder, '/correlations']; +mkdir(corr_folder); + + +fields_adj=fieldnames(output.adj_tables_holdout_RHO_p); +for i=1:size(fields_adj,1) + latent_scores = fieldnames(output.adj_tables_holdout_RHO_p.(fields_adj{i})); + for ii=1:size(latent_scores,1) + % dp_txt_write(s_folder, ['/corr_RHO_', fields{i}, '_', latent_scores{ii}], output.tables_ho_RHO.(fields{i}).(latent_scores{ii})', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \n'); + % dp_txt_write(s_folder, ['/corr_p_', fields{i}, '_', latent_scores{ii}], output.tables_ho_p.(fields{i}).(latent_scores{ii})', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \n'); + dp_txt_write(corr_folder, ['adj_corr_all_', fields_adj{i}, '_', latent_scores{ii}], output.adj_tables_holdout_RHO_p.(fields_adj{i}).(latent_scores{ii})', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \n'); + end +end + + +end \ No newline at end of file diff --git a/Visualization_Module/dp_fitlm.m b/Visualization_Module/dp_fitlm.m new file mode 100644 index 0000000..3356e11 --- /dev/null +++ b/Visualization_Module/dp_fitlm.m @@ -0,0 +1,17 @@ +%% DP function to quickly do fitlm and extract relevant features + +function [result] = dp_fitlm(X, Y) + +for v=1:size(Y,2) + mdl = fitlm(X, Y(:,v)); + p(v,1) = mdl.coefTest; + rsquared(v,1) = mdl.Rsquared.Adjusted; + output.post_hoc_mdl.test.rsquared = [output.post_hoc_mdl.test.rsquared, rsquared]; + output.post_hoc_mdl.test.p = [output.post_hoc_mdl.test.p, p]; + output.post_hoc_mdl.test.mdl = {output.post_hoc_mdl.test.mdl, mdl}; +end + +[output.post_hoc_mdl.test.p, ~] = dp_FDR_adj(output.post_hoc_mdl.test.p); + + +end \ No newline at end of file diff --git a/Visualization_Module/dp_get_latent_scores.m b/Visualization_Module/dp_get_latent_scores.m new file mode 100644 index 0000000..f1c2d5a --- /dev/null +++ b/Visualization_Module/dp_get_latent_scores.m @@ -0,0 +1,78 @@ +%% DP function to get latent scores from SPLS results +function latent_scores_table = dp_get_latent_scores(results_file, group_select, correct) +load(results_file); +% group_select = 'all'; % all OR fold +% correct = 1; % 1=yes, 2=no +latent_scores_all =[]; +latent_scores_names = []; +RHO_all=[]; + +switch group_select + case 'all' + temp = load(input.MRI_path); + name_temp = fieldnames(temp); + X = temp.(name_temp{1}); + Y = input.Y; + + for i=1:(size(output.final_parameters,1)-1) + + IN_x.train = X; + IN_x.test = X; + IN_y.train = Y; + IN_y.test = Y; + + switch correct + + case 1 % with site correction + if i==1 + COV.train = input.sites; + COV.test = input.sites; + else + COV.train = nan(size(input.sites,1),1); + COV.test = nan(size(input.sites,1),1); + input.correction_target = 3; + end + case 2 % without site correction + COV.train = nan(size(input.sites,1),1); + COV.test = nan(size(input.sites,1),1); + input.correction_target = 3; + end + + if ~isempty(input.cs_method.correction_subgroup) + labels_temp = input.data_complete.foranalysis.basic{input.Y_final.Properties.RowNames, 'Labels'}; + cs_method.correction_subgroup = input.cs_method.correction_subgroup; + cs_method.method = input.cs_method.method; + cs_method.subgroup_train = contains(labels_temp, cs_method.correction_subgroup); + cs_method.subgroup_test = contains(labels_temp, cs_method.correction_subgroup); + else + cs_method.correction_subgroup = []; + cs_method.method = 'mean-centering'; + cs_method.subgroup_train = []; + cs_method.subgroup_test = []; + end + + [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, cs_method, input.correction_target); + + log_u = matches(output.parameters_names, 'u'); + log_v = matches(output.parameters_names, 'v'); + u = output.final_parameters{i,log_u}; + v = output.final_parameters{i,log_v}; + + [RHO, epsilon, omega, u, v] = dp_projection(OUT_x.train, OUT_y.train, u, v, input.correlation_method); + + [X,Y] = proj_def(X, Y, u, v); + + RHO_all = [RHO_all, RHO]; + latent_scores_all = [latent_scores_all, omega, epsilon]; + latent_scores_names = [latent_scores_names, {['omega_LV', num2str(i)]}, {['epsilon_LV', num2str(i)]}]; + + end + + + latent_scores_table = array2table(latent_scores_all, 'RowNames', input.final_PSN, 'VariableNames', latent_scores_names); + + case 'fold' + + latent_scores_table = array2table([omega_all{3}, epsilon_all{3}, omega_all{4}, epsilon_all{4}], 'RowNames', input.final_PSN, 'VariableNames', {'omega_LV3', 'epsilon_LV3', 'omega_LV4', 'epsilon_LV4'}); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_gm_volume.m b/Visualization_Module/dp_gm_volume.m new file mode 100644 index 0000000..0cc7c7d --- /dev/null +++ b/Visualization_Module/dp_gm_volume.m @@ -0,0 +1,62 @@ +%% testing for correlations between GM volume and clinical pattern + +results_file = '/volume/HCStress/Analysis/27-Mar-2019/DP_CTQ_allgroups_649_GM_80PI_12GO_110X060_40GD_110Y010_40GD_correct10_10_XY_SC_3_Diag/final_results/result.mat'; + +load(results_file); + +variables = {'male', 'female'}; +number_brain_regions = 1; +colorpattern_LS = hsv(number_brain_regions); + +% first_line = strrep([input.name, ' grid_density_x=' num2str(input.grid_x.density), ' grid_density_x=' num2str(input.grid_x.density), ' grid_density_y=' num2str(input.grid_y.density), ', LV ',num2str(i)], '_', ' '); +% second_line = strrep([input.selected_studygroups{1,ii}, ', epsilon x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); +% if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) +% third_line1 = 'LV significant'; +% else +% third_line1 = 'LV not significant'; +% end + +% marker_size = 10; +% font_size = 26; +% axis([0 3 0 1]); +nn=1; +for i=1:number_brain_regions + for ii=1:2 + subplot(1,2,nn); + GM_data_all = output.volumes.brainnetome.raw{4,ii}(:,i); + GM_data_male = GM_data_all(input.behavior(:,strcmp(input.behavior_names, 'male'))>0); + GM_data_female = GM_data_all(input.behavior(:,strcmp(input.behavior_names, 'female'))>0); + boxplot([GM_data_all;GM_data_all], [ones(size(GM_data_all,1),1); (input.behavior(:,strcmp(input.behavior_names, 'male'))+2)]); + title(output.volumes.brainnetome.names{4,ii}{2,i}); + xticklabels({'all', 'female', 'male'}); + ylabel('GM volume'); + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + nn=nn+1; + end +end + + +nn=1; +for i=1:number_brain_regions + for ii=1:2 + subplot(1,2,nn); + GM_data_all = output.volumes.brainnetome.raw{1,ii}(:,i); + age_data_all = input.behavior(:, strcmp(input.behavior_names, 'Age')); + scatter(age_data_all, GM_data_all); + title(output.volumes.brainnetome.names{1,ii}{2,i}); + xlabel('Age'); + ylabel('GM volume'); + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + nn=nn+1; + end +end + + + + + + + + diff --git a/Visualization_Module/dp_gm_volume.m~ b/Visualization_Module/dp_gm_volume.m~ new file mode 100644 index 0000000..4f4b6d2 --- /dev/null +++ b/Visualization_Module/dp_gm_volume.m~ @@ -0,0 +1,48 @@ +%% testing for correlations between GM volume and clinical pattern + +results_file = '/volume/HCStress/Analysis/27-Mar-2019/DP_CTQ_allgroups_649_GM_80PI_12GO_110X060_40GD_110Y010_40GD_correct10_10_XY_SC_3_Diag/final_results/result.mat'; + +load(results_file); + +variables = {'male', 'female'}; +number_brain_regions = 2; +colorpattern_LS = hsv(number_brain_regions); + +first_line = strrep([input.name, ' grid_density_x=' num2str(input.grid_x.density), ' grid_density_x=' num2str(input.grid_x.density), ' grid_density_y=' num2str(input.grid_y.density), ', LV ',num2str(i)], '_', ' '); +second_line = strrep([input.selected_studygroups{1,ii}, ', epsilon x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); +if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line1 = 'LV significant'; +else + third_line1 = 'LV not significant'; +end + +marker_size = 10; +font_size = 26; +axis([0 3 0 1]); +nn=1; +for i=1:number_brain_regions + for ii=1:2 + subplot(2,2,nn); + GM_data_all = output.volumes.brainnetome.raw{4,ii}(:,i); + GM_data_male = GM_data_all(input.behavior(:,strcmp(input.behavior_names, 'male'))>0); + GM_data_female = GM_data_all(input.behavior(:,strcmp(input.behavior_names, 'female'))>0); + boxplot([GM_data_all;GM_data_all], [ones(size(GM_data_all,1),1); (input.behavior(:,strcmp(input.behavior_names, 'male'))+2)]); + title(output.volumes.brainnetome.names{4,ii}{2,i}); + xticklabels({'all', 'female', 'male'}); + ylabel('GM volume'); + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + nn=nn+1; + end +end + + + + + + + + + + + diff --git a/Visualization_Module/dp_hyperopt_ICV.m b/Visualization_Module/dp_hyperopt_ICV.m new file mode 100644 index 0000000..6a6af3f --- /dev/null +++ b/Visualization_Module/dp_hyperopt_ICV.m @@ -0,0 +1,37 @@ +%% new function for permutation testing + +function dp_hyperopt_ICV(i, size_sets_str, analysis_folder) + +load([analysis_folder '/keep_in_partition.mat']); + +size_sets = str2double(size_sets_str); + +for ii=1:size_sets + if exist([analysis_folder '/cu_' i '_' num2str(ii) '.txt'],'file') + RHO_collection = nan(size(cv_inner.TestInd,2),ii); + cu = dp_txtscan([analysis_folder '/cu_', i, '_', num2str(ii), '.txt'], '%f'); + cv = dp_txtscan([analysis_folder '/cv_', i, '_', num2str(ii), '.txt'], '%f'); + + if cu > sqrt(size(keep_in_data_x,2)) + cu = sqrt(size(keep_in_data_x,2)); + end + + if cv > sqrt(size(keep_in_data_y,2)) + cv = sqrt(size(keep_in_data_y,2)); + end + + for k=1:size(cv_inner.TestInd,2) + test_data_x = keep_in_data_x(cv_inner.TestInd{k},:); + test_data_y = keep_in_data_y(cv_inner.TestInd{k},:); + training_data_x = keep_in_data_x(cv_inner.TrainInd{k},:); + training_data_y = keep_in_data_y(cv_inner.TrainInd{k},:); + RHO_collection(k,ii) = dp_k_split(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method); + end + end +end + +try + save([analysis_folder, '/RHO_', i, '.mat'], 'RHO_collection'); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_hyperopt_ICV.m~ b/Visualization_Module/dp_hyperopt_ICV.m~ new file mode 100644 index 0000000..b9e26a5 --- /dev/null +++ b/Visualization_Module/dp_hyperopt_ICV.m~ @@ -0,0 +1,49 @@ +%% new function for permutation testing + +function dp_hyperopt_ICV(i, size_sets_str, analysis_folder) + +load([analysis_folder '/keep_in_partition.mat']); +% dp_txt_write(analysis_folder, ['init_' i],'initialized','%s \n'); + +size_sets = str2double(size_sets_str); +RHO_median_collection = nan(size_sets,1); + +for ii=1:size_sets + if exist([analysis_folder '/cu_' i '_' num2str(ii) '.txt'],'file') + RHO_collection = nan(size(cv_inner.TestInd,2),1); + cu = dp_txtscan([analysis_folder '/cu_', i, '_', num2str(ii), '.txt'], '%f'); + cv = dp_txtscan([analysis_folder '/cv_', i, '_', num2str(ii), '.txt'], '%f'); + + if cu > sqrt(size(keep_in_data_x,2)) + cu = sqrt(size(keep_in_data_x,2)); + end + + if cv > sqrt(size(keep_in_data_y,2)) + cv = sqrt(size(keep_in_data_y,2)); + end + + for k=1:size(cv_inner.TestInd,2) + test_data_x = keep_in_data_x(cv_inner.TestInd{k},:); + test_data_y = keep_in_data_y(cv_inner.TestInd{k},:); + training_data_x = keep_in_data_x(cv_inner.TrainInd{k},:); + training_data_y = keep_in_data_y(cv_inner.TrainInd{k},:); + RHO_collection(k,1) = dp_k_split(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method); + end + + RHO_median_collection(ii,1) = median(RHO_collection); +% FID = fopen([analysis_folder, '/init_' i '.txt'], 'a'); +% fprintf(FID, '%d \n', ii); +% fclose(FID); + end + +end + +% RHO_median_collection(isnan(RHO_median_collection))=[]; +try + dp_txt_write(analysis_folder, ['RHO_' i], RHO_median_collection, '%.4f\n'); +catch ME + ME.message + save([analysis_folder, '/RHO_', i, '.mat'], 'RHO_median_collection'); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_k_split.m b/Visualization_Module/dp_k_split.m new file mode 100644 index 0000000..42eb91a --- /dev/null +++ b/Visualization_Module/dp_k_split.m @@ -0,0 +1,20 @@ +%% DP function for one k split + +function [RHO] = dp_k_split(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method) + +% % initialize variables +% test_data_x = []; training_data_x=[]; test_data_y=[]; training_data_y=[]; +% u=[]; v=[]; RHO=[]; +% +% % separate the keep_in data into training and test data according to +% % the chosen test percentage tp +% [test_data_x, training_data_x, test_data_y, training_data_y] = dp_partition_holdout(tp, keep_in_data_x, keep_in_data_y); + +%perform SPLS on the training data using the current cu/cv combination +[u, v, ~] = spls_suppressed_display(training_data_x,training_data_y,cu,cv); + +%compute the correlation between the projections of the training and +%test matrices onto the SPLS latent space spanned by the weight vectors +RHO = abs(corr(test_data_x*u,test_data_y*v, 'Type', correlation_method)); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_master_correctscale.m b/Visualization_Module/dp_master_correctscale.m new file mode 100644 index 0000000..44e9ca9 --- /dev/null +++ b/Visualization_Module/dp_master_correctscale.m @@ -0,0 +1,27 @@ +%% DP master function for correcting and scaling matrices + +function [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, cs_method, correction_target) + +switch correction_target + case 1 + OUT_x = dp_correctscale(IN_x,COV,cs_method); + try COV.test = nan(size(COV.test,1),1); + COV.train = nan(size(COV.train,1),1); + catch + COV = nan(size(COV,1),1); + end + OUT_y = dp_correctscale(IN_y,COV,cs_method); + case 2 + OUT_y = dp_correctscale(IN_y,COV,cs_method); + try COV.test = nan(size(COV.test,1),1); + COV.train = nan(size(COV.train,1),1); + catch + COV = nan(size(COV,1),1); + end + OUT_x = dp_correctscale(IN_x,COV,cs_method); + case 3 + OUT_x = dp_correctscale(IN_x,COV,cs_method); + OUT_y = dp_correctscale(IN_y,COV,cs_method); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_projection.m b/Visualization_Module/dp_projection.m new file mode 100644 index 0000000..8a58b81 --- /dev/null +++ b/Visualization_Module/dp_projection.m @@ -0,0 +1,24 @@ +%% DP function for projection of u and v onto test data +function [RHO, epsilon, omega, u, v] = dp_projection(data_x, data_y, u, v, correlation_method) + +epsilon = data_x*u; +omega = data_y*v; +RHO = corr(epsilon, omega, 'Type', correlation_method); + +f_invert = @(x)(-1*x); + +if RHO<0 + v = f_invert(v); + omega = data_y*v; + RHO = corr(epsilon, omega, 'Type', correlation_method); +end + +if sum(v)<0 + u = f_invert(u); + v = f_invert(v); + epsilon = data_x*u; + omega = data_y*v; + RHO = corr(epsilon, omega, 'Type', correlation_method); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_projection_ext.m b/Visualization_Module/dp_projection_ext.m new file mode 100644 index 0000000..de5b58c --- /dev/null +++ b/Visualization_Module/dp_projection_ext.m @@ -0,0 +1,24 @@ +%% DP function for projection of u and v onto test data +function [RHO, p, epsilon, omega, u, v] = dp_projection_ext(data_x, data_y, u, v, correlation_method) + +epsilon = data_x*u; +omega = data_y*v; +[RHO, p] = corr(epsilon, omega, 'Type', correlation_method); + +f_invert = @(x)(-1*x); + +if RHO<0 + v = f_invert(v); + omega = data_y*v; + [RHO, p] = corr(epsilon, omega, 'Type', correlation_method); +end + +if sum(v)<0 + u = f_invert(u); + v = f_invert(v); + epsilon = data_x*u; + omega = data_y*v; + [RHO, p] = corr(epsilon, omega, 'Type', correlation_method); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_regions_table.m b/Visualization_Module/dp_regions_table.m new file mode 100644 index 0000000..ebcbaf6 --- /dev/null +++ b/Visualization_Module/dp_regions_table.m @@ -0,0 +1,164 @@ +%% DP script to write out voxels as table for paper + +% load output file +results_path = '/volume/HCStress/Analysis/02-Jul-2020/CISS_636_IQRadd_NCV55_single_folds_bestmerge_noval_min10_2020_5000AUC_Dev/final_results/result_final_vis.mat'; +load(results_path); + +readouts = {'brainnetome', 'cerebellum'}; + +for r=1:size(readouts,2) + + switch readouts{r} + case 'brainnetome' + fields = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_indices.mat'); + atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_3mm_636_CISS_NM_X.mat'; + case 'cerebellum' + load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/cerebellum_vermis_indices.mat'); + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_649_CTQ_X.mat'; + + end + + % create template to sort voxels in + fields_temp = fieldnames(fields); + labels_regions = fields.(fields_temp{1}); + + numbers_region_matrix = [[1:2:size(labels_regions,1)]',[2:2:size(labels_regions,1)]']; + + % create algorithm to sort all LV voxels in final_matrix template + fields = fieldnames(output.atlas_readouts); + temp_names = output.atlas_readouts.vector_1.(readouts{r}).positive.Properties.VariableNames; + + for i=1:(size(fields,1)-1) + % create empty double template + output.regions.table_collection.(['LV_', num2str(i)]).final_matrix = nan(size(numbers_region_matrix,1), 2*size(numbers_region_matrix,2)); + fields1 = fieldnames(output.atlas_readouts.(fields{i}).(readouts{r})); + for ii=1:size(fields1,1) + temp_vector = output.atlas_readouts.(fields{i}).(readouts{r}).(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(numbers_region_matrix, temp_vector{iii,1})); + switch col + case 1 + col_final = ii*col; + case 2 + col_final = ii+col; + end + output.regions.table_collection.(['LV_', num2str(i)]).final_matrix(row, col_final) = temp_vector{iii,2}; + end + end + end + + % get absolute voxel values + load(atlas_path_full); + a_number = 1; + atlas_for_analysis = round(MRI_for_analysis(a_number,:)); + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + + test = ismember(numbers_region_matrix, C_atlas); + counts_atlas(1)=[]; + nn=1; + for i=1:size(numbers_region_matrix,1) + for ii=1:2 + if test(i,ii) + output.regions.complete_voxels_regions(i,ii)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_regions(i,ii)=NaN; + end + end + end + + output.regions.complete_voxels_regions_mean = nanmean(output.regions.complete_voxels_regions,2); + + % get extra cerebellum data + % create template to sort voxels in + fields = output.regions.cerebellum.count.voxels; + temp_names = output.regions.cerebellum.count_names; + + % first compute the main cerebellum hemispheres using + % cerebellumhemispheres_indices + + for i=1:(size(fields,1)-1) + % create empty double template + output.regions.table_collection.(['LV_', num2str(i)]).cerebellum_matrix = nan(size(cerebellumhemispheres_indices,1), 2*size(cerebellumhemispheres_indices,2)); + + for ii=1:(size(fields,2)-1) + try temp_vector = fields{i,ii}(:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage')>0)); + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(cerebellumhemispheres_indices, temp_vector{iii,1})); + switch col + case 1 + col_final = ii*col; + case 2 + col_final = ii+col; + end + output.regions.table_collection.(['LV_', num2str(i)]).cerebellum_matrix(row, col_final) = temp_vector{iii,2}; + end + end + end + + output.regions.table_collection.(['LV_', num2str(i)]).vermis_matrix = nan(size(vermis_indices,1), 2*size(vermis_indices,2)); + + for ii=1:(size(fields,2)-1) + try temp_vector = fields{i,ii}(:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage')>0)); + for iii=1:size(temp_vector,1) + [row, ~] = find(ismember(vermis_indices, temp_vector{iii,1})); + output.regions.table_collection.(['LV_', num2str(i)]).vermis_matrix(row, ii) = temp_vector{iii,2}; + end + end + end + + + end + + + % get absolute voxel values for cerebellum + load(atlas_path_full); + a_number = 1; + atlas_for_analysis = round(MRI_for_analysis(a_number,:)); + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + + test = ismember(cerebellumhemispheres_indices, C_atlas); + counts_atlas(1)=[]; + nn=1; + for i=1:size(cerebellumhemispheres_indices,1) + for ii=1:2 + if test(i,ii) + output.regions.complete_voxels_cerebellum(i,ii)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_cerebellum(i,ii)=NaN; + end + end + end + + output.regions.complete_voxels_cerebellum_mean = nanmean(output.regions.complete_voxels_cerebellum,2); + + % get absolute voxel values for vermis + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_649_CTQ_X.mat'; + load(atlas_path_full); + a_number = 1; + atlas_for_analysis = round(MRI_for_analysis(a_number,:)); + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + + test = ismember(vermis_indices, C_atlas); + counts_atlas(1)=[]; + nn=1; + for i=1:size(vermis_indices,1) + % for ii=1:2 + if test(i) + output.regions.complete_voxels_vermis(i,1)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_vermis(i,1)=NaN; + end + % end + end + + save(results_path, 'input', 'setup', 'output'); + + +end + diff --git a/Visualization_Module/dp_regions_table_new.m b/Visualization_Module/dp_regions_table_new.m new file mode 100644 index 0000000..20582b1 --- /dev/null +++ b/Visualization_Module/dp_regions_table_new.m @@ -0,0 +1,154 @@ +%% DP script to write out voxels as table for paper +function OUT = dp_atlas_table_readout(IN); + +% load output file +results_path = '/volume/HCStress/Analysis/02-Jul-2020/CISS_636_IQRadd_NCV55_single_folds_bestmerge_noval_min10_2020_5000AUC_Dev/final_results/result_final_vis.mat'; +load(results_path); + +% create template to sort voxels in +fields = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_indices.mat'); +temp = fieldnames(fields); +labels_regions = fields.(temp{1}); + +numbers_region_matrix = [[1:2:size(labels_regions,1)]',[2:2:size(labels_regions,1)]']; + +% create algorithm to sort all LV voxels in final_matrix template +fields = fieldnames(output.atlas_readouts); +temp_names = output.atlas_readouts.vector_1.brainnetome.positive.Properties.VariableNames; + +for i=1:(size(fields,1)-1) + % create empty double template + output.regions.table_collection.(['LV_', num2str(i)]).final_matrix = nan(size(numbers_region_matrix,1), 2*size(numbers_region_matrix,2)); + fields1 = fieldnames(output.atlas_readouts.(fields{i}).brainnetome); + for ii=1:size(fields1,1) + temp_vector = output.atlas_readouts.(fields{i}).brainnetome.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(numbers_region_matrix, temp_vector{iii,1})); + switch col + case 1 + col_final = ii*col; + case 2 + col_final = ii+col; + end + output.regions.table_collection.(['LV_', num2str(i)]).final_matrix(row, col_final) = temp_vector{iii,2}; + end + end +end + +% get absolute voxel values +atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_3mm_636_CISS_NM_X.mat'; +load(atlas_path_full); +a_number = 1; +atlas_for_analysis = round(MRI_for_analysis(a_number,:)); +[C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); +counts_atlas = accumarray(ic_atlas, 1); + +test = ismember(numbers_region_matrix, C_atlas); +counts_atlas(1)=[]; +nn=1; +for i=1:size(numbers_region_matrix,1) + for ii=1:2 + if test(i,ii) + output.regions.complete_voxels_regions(i,ii)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_regions(i,ii)=NaN; + end + end +end + +output.regions.complete_voxels_regions_mean = nanmean(output.regions.complete_voxels_regions,2); + +% get extra cerebellum data +% create template to sort voxels in +load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/cerebellum_vermis_indices.mat'); +fields = fieldnames(output.atlas_readouts); +temp_names = output.atlas_readouts.vector_1.cerebellum.positive.Properties.VariableNames; + +% first compute the main cerebellum hemispheres using +% cerebellumhemispheres_indices + +for i=1:(size(fields,1)-1) + % create empty double template + output.regions.table_collection.(['LV_', num2str(i)]).cerebellum_matrix = nan(size(cerebellumhemispheres_indices,1), 2*size(cerebellumhemispheres_indices,2)); + fields1 = fieldnames(output.atlas_readouts.(fields{i}).cerebellum); + for ii=1:size(fields1,1) + try temp_vector = output.atlas_readouts.(fields{i}).cerebellum.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(cerebellumhemispheres_indices, temp_vector{iii,1})); + switch col + case 1 + col_final = ii*col; + case 2 + col_final = ii+col; + end + output.regions.table_collection.(['LV_', num2str(i)]).cerebellum_matrix(row, col_final) = temp_vector{iii,2}; + end + end + end + + output.regions.table_collection.(['LV_', num2str(i)]).vermis_matrix = nan(size(vermis_indices,1), 2*size(vermis_indices,2)); + fields1 = fieldnames(output.atlas_readouts.(fields{i}).cerebellum); + for ii=1:size(fields1,1) + try temp_vector = output.atlas_readouts.(fields{i}).cerebellum.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(vermis_indices, temp_vector{iii,1})); + output.regions.table_collection.(['LV_', num2str(i)]).vermis_matrix(row, ii) = temp_vector{iii,2}; + end + end + end + + +end + +% get absolute voxel values for cerebellum +atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_636_CISS_X.mat'; +load(atlas_path_full); +a_number = 1; +atlas_for_analysis = round(MRI_for_analysis(a_number,:)); +[C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); +counts_atlas = accumarray(ic_atlas, 1); + +test = ismember(cerebellumhemispheres_indices, C_atlas); +counts_atlas(1)=[]; +nn=1; +for i=1:size(cerebellumhemispheres_indices,1) + for ii=1:2 + if test(i,ii) + output.regions.complete_voxels_cerebellum(i,ii)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_cerebellum(i,ii)=NaN; + end + end +end + +output.regions.complete_voxels_cerebellum_mean = nanmean(output.regions.complete_voxels_cerebellum,2); + +% get absolute voxel values for vermis +atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_636_CISS_X.mat'; +load(atlas_path_full); +a_number = 1; +atlas_for_analysis = round(MRI_for_analysis(a_number,:)); +[C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); +counts_atlas = accumarray(ic_atlas, 1); + +test = ismember(vermis_indices, C_atlas); +counts_atlas(1)=[]; +nn=1; +for i=1:size(vermis_indices,1) +% for ii=1:2 + if test(i) + output.regions.complete_voxels_vermis(i,1)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_vermis(i,1)=NaN; + end +% end +end + +% save(results_path, 'input', 'setup', 'output'); + + + + diff --git a/Visualization_Module/dp_regions_table_new.m~ b/Visualization_Module/dp_regions_table_new.m~ new file mode 100644 index 0000000..b2d296a --- /dev/null +++ b/Visualization_Module/dp_regions_table_new.m~ @@ -0,0 +1,153 @@ +%% DP script to write out voxels as table for paper + +% load output file +results_path = '/volume/HCStress/Analysis/02-Jul-2020/CISS_636_IQRadd_NCV55_single_folds_bestmerge_noval_min10_2020_5000AUC_Dev/final_results/result_final_vis.mat'; +load(results_path); + +% create template to sort voxels in +fields = load('/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_indices.mat'); +temp = fieldnames(fields); +labels_regions = fields.(temp{1}); + +numbers_region_matrix = [[1:2:size(labels_regions,1)]',[2:2:size(labels_regions,1)]']; + +% create algorithm to sort all LV voxels in final_matrix template +fields = fieldnames(output.atlas_readouts); +temp_names = output.atlas_readouts.vector_1.brainnetome.positive.Properties.VariableNames; + +for i=1:(size(fields,1)-1) + % create empty double template + output.regions.table_collection.(['LV_', num2str(i)]).final_matrix = nan(size(numbers_region_matrix,1), 2*size(numbers_region_matrix,2)); + fields1 = fieldnames(output.atlas_readouts.(fields{i}).brainnetome); + for ii=1:size(fields1,1) + temp_vector = output.atlas_readouts.(fields{i}).brainnetome.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(numbers_region_matrix, temp_vector{iii,1})); + switch col + case 1 + col_final = ii*col; + case 2 + col_final = ii+col; + end + output.regions.table_collection.(['LV_', num2str(i)]).final_matrix(row, col_final) = temp_vector{iii,2}; + end + end +end + +% get absolute voxel values +atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Brainnetome_Atlas/brainnetome_3mm_636_CISS_NM_X.mat'; +load(atlas_path_full); +a_number = 1; +atlas_for_analysis = round(MRI_for_analysis(a_number,:)); +[C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); +counts_atlas = accumarray(ic_atlas, 1); + +test = ismember(numbers_region_matrix, C_atlas); +counts_atlas(1)=[]; +nn=1; +for i=1:size(numbers_region_matrix,1) + for ii=1:2 + if test(i,ii) + output.regions.complete_voxels_regions(i,ii)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_regions(i,ii)=NaN; + end + end +end + +output.regions.complete_voxels_regions_mean = nanmean(output.regions.complete_voxels_regions,2); + +% get extra cerebellum data +% create template to sort voxels in +load('/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/cerebellum_vermis_indices.mat'); +fields = fieldnames(output.atlas_readouts); +temp_names = output.atlas_readouts.vector_1.cerebellum.positive.Properties.VariableNames; + +% first compute the main cerebellum hemispheres using +% cerebellumhemispheres_indices + +for i=1:(size(fields,1)-1) + % create empty double template + output.regions.table_collection.(['LV_', num2str(i)]).cerebellum_matrix = nan(size(cerebellumhemispheres_indices,1), 2*size(cerebellumhemispheres_indices,2)); + fields1 = fieldnames(output.atlas_readouts.(fields{i}).cerebellum); + for ii=1:size(fields1,1) + try temp_vector = output.atlas_readouts.(fields{i}).brainnetome.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, col] = find(ismember(cerebellumhemispheres_indices, temp_vector{iii,1})); + switch col + case 1 + col_final = ii*col; + case 2 + col_final = ii+col; + end + output.regions.table_collection.(['LV_', num2str(i)]).cerebellum_matrix(row, col_final) = temp_vector{iii,2}; + end + end + end + + output.regions.table_collection.(['LV_', num2str(i)]).vermis_matrix = nan(size(vermis_indices,1), 2*size(vermis_indices,2)); + + for ii=1:(size(fields,2)-1) + try temp_vector = output.atlas_readouts.(fields{i}).brainnetome.(fields1{ii}){:, (strcmp(temp_names, 'region_number')+strcmp(temp_names, 'voxel_percentage_weighted')>0)}; + for iii=1:size(temp_vector,1) + [row, ~] = find(ismember(vermis_indices, temp_vector{iii,1})); + output.regions.table_collection.(['LV_', num2str(i)]).vermis_matrix(row, ii) = temp_vector{iii,2}; + end + end + end + + +end + +% get absolute voxel values for cerebellum +atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_636_CISS_X.mat'; +load(atlas_path_full); +a_number = 1; +atlas_for_analysis = round(MRI_for_analysis(a_number,:)); +[C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); +counts_atlas = accumarray(ic_atlas, 1); + +test = ismember(cerebellumhemispheres_indices, C_atlas); +counts_atlas(1)=[]; +nn=1; +for i=1:size(cerebellumhemispheres_indices,1) + for ii=1:2 + if test(i,ii) + output.regions.complete_voxels_cerebellum(i,ii)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_cerebellum(i,ii)=NaN; + end + end +end + +output.regions.complete_voxels_cerebellum_mean = nanmean(output.regions.complete_voxels_cerebellum,2); + +% get absolute voxel values for vermis +atlas_path_full = '/volume/HCStress/Data/MRI/Atlases/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_636_CISS_X.mat'; +load(atlas_path_full); +a_number = 1; +atlas_for_analysis = round(MRI_for_analysis(a_number,:)); +[C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); +counts_atlas = accumarray(ic_atlas, 1); + +test = ismember(vermis_indices, C_atlas); +counts_atlas(1)=[]; +nn=1; +for i=1:size(vermis_indices,1) +% for ii=1:2 + if test(i) + output.regions.complete_voxels_vermis(i,1)=counts_atlas(nn); + nn=nn+1; + else + output.regions.complete_voxels_vermis(i,1)=NaN; + end +% end +end + +% save(results_path, 'input', 'setup', 'output'); + + + + diff --git a/Visualization_Module/dp_resample_image.m b/Visualization_Module/dp_resample_image.m new file mode 100644 index 0000000..98ebe3c --- /dev/null +++ b/Visualization_Module/dp_resample_image.m @@ -0,0 +1,19 @@ +%% script to resample images + +path_brains = pwd; %'/volume/HCStress/Analysis/Resilience/DP_CISS_RSA_HC_ROD_CHR_ROP_Diag_634_GM_80PI_20GD_correct10_10_XY_SC_3/images'; + +brains_selected = 2; +voxsiz=[1 1 1]; +% name = '/volume/HCStress/Data/MRI/DP_CISS_636_average_optthr.nii'; +for b=1:brains_selected + V = spm_vol([path_brains, '/brain_LV_final_', num2str(b), '.nii']); +% V = spm_vol(name); + for i=1:numel(V) + bb = spm_get_bbox(V(i)); + VV(1:2) = V(i); + VV(1).mat = spm_matrix([bb(1,:) 0 0 0 voxsiz])*spm_matrix([-1 -1 -1]); + VV(1).dim = ceil(VV(1).mat \ [bb(2,:) 1]' - 0.1)'; + VV(1).dim = VV(1).dim(1:3); + spm_reslice(VV,struct('mean',false,'which',1,'interp',1)); % 1 for linear + end +end \ No newline at end of file diff --git a/Visualization_Module/dp_resample_image.m~ b/Visualization_Module/dp_resample_image.m~ new file mode 100644 index 0000000..8b9b393 --- /dev/null +++ b/Visualization_Module/dp_resample_image.m~ @@ -0,0 +1,18 @@ +%% script to resample images + +path_brains = pwd; %'/volume/HCStress/Analysis/Resilience/DP_CISS_RSA_HC_ROD_CHR_ROP_Diag_634_GM_80PI_20GD_correct10_10_XY_SC_3/images'; + +brains_selected = 1:7; +voxsiz=[1 1 1]; + +for b=1:numel(brains_selected) + V = spm_vol([path_brains, '/brain_LV_final_', num2str(brains_selected(b)), '.nii']); + for i=1:numel(V) + bb = spm_get_bbox(V(i)); + VV(1:2) = V(i); + VV(1).mat = spm_matrix([bb(1,:) 0 0 0 voxsiz])*spm_matrix([-1 -1 -1]); + VV(1).dim = ceil(VV(1).mat \ [bb(2,:) 1]' - 0.1)'; + VV(1).dim = VV(1).dim(1:3); + spm_reslice(VV,struct('mean',false,'which',1,'interp',1)); % 1 for linear + end +end \ No newline at end of file diff --git a/Visualization_Module/dp_results_collection_new.m b/Visualization_Module/dp_results_collection_new.m new file mode 100644 index 0000000..f9cb645 --- /dev/null +++ b/Visualization_Module/dp_results_collection_new.m @@ -0,0 +1,290 @@ +%% DP function to analyze results in varying grid densities +%% prepare indices + +output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names,'p'); +opt_RHO = strcmp(output.parameters_names,'RHO'); +opt_cu = strcmp(output.parameters_names,'cu'); +opt_cv = strcmp(output.parameters_names,'cv'); + +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_param_p = strcmp(opt_parameters_names, 'p'); +opt_param_v = strcmp(opt_parameters_names, 'v'); +opt_param_u = strcmp(opt_parameters_names, 'u'); +opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); +opt_param_cu = strcmp(opt_parameters_names, 'cu'); +opt_param_cv = strcmp(opt_parameters_names, 'cv'); + +load('/volume/HCStress/Doc/Stress_Resilience_questionnaires.mat'); + +%% % variables to assess: significant_LVs, range_RHO, range_p +results_paths_stress = {'/volume/HCStress/Analysis/20-Sep-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_4GD_correct_1/final_results/result.mat',... + '/volume/HCStress/Analysis/22-Oct-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_3GD_correct_1/final_results/result.mat',... + '/volume/HCStress/Analysis/20-Sep-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_6GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/22-Oct-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_5GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/22-Sep-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_8GD_correct_2/final_results/result.mat',... + '/volume/HCStress/Analysis/23-Oct-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_7GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/24-Sep-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_8GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/23-Oct-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_9GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/25-Sep-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_10GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/27-Sep-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_12GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/01-Oct-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_626_GM_80PI_14GD_correct/final_results/result.mat'}; + +results_paths_resilience = {'/volume/HCStress/Analysis/21-Sep-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_4GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/23-Oct-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_634_GM_80PI_3GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/22-Sep-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_6GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/23-Oct-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_634_GM_80PI_5GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/22-Sep-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_8GD_correct_3/final_results/result.mat',... + '/volume/HCStress/Analysis/26-Oct-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_634_GM_80PI_7GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/24-Sep-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_8GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/26-Oct-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_634_GM_80PI_9GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/25-Sep-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_10GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/27-Sep-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_12GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/01-Oct-2018/DP_CISS_RSA_HC_ROD_CHR_ROP_631_GM_80PI_14GD_correct/final_results/result.mat'}; + +results_paths = {'/volume/HCStress/Analysis/12-Nov-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_10GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/12-Nov-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_10GD_correct/final_results/result_pruned_0_5onlysig_top10.mat',... + '/volume/HCStress/Analysis/12-Nov-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_10GD_correct/final_results/result_top10.mat',... + '/volume/HCStress/Analysis/09-Nov-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_20GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/09-Nov-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_20GD_correct/final_results/result_pruned_0_5onlysig.mat',... + '/volume/HCStress/Analysis/09-Nov-2018/DP_CTQ_BS_HC_ROD_CHR_ROP_627_GM_80PI_20GD_correct/final_results/result_top10.mat'}; + +load(results_paths{i}); +input.name=[input.name, '_new']; +save(results_paths{1}, 'input', 'output', 'setup'); + +% CTQ and BS combined +results_paths = {'/volume/HCStress/Analysis/14-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_1GO_10X10_20GD_10Y10_20GD_correct10_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/25-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_1GO_10X15_20GD_10Y10_20GD_correct10_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/26-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_12GO_520X_03020GD_110Y_01020GD_correct10_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/26-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_1GO_10X20_20GD_10Y10_20GD_correct10_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/27-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_12GO_5010X_06010GD_110Y_01010GD_correct10_10_XY_SC_3_Diag_1/final_results/result.mat',... + '/volume/HCStress/Analysis/28-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_12GO_110X_06020GD_110Y_01020GD_correct10_5_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/28-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_1GO_1X_020GD_1Y_020GD_correct10_5_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/28-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_1GO_1X_020GD_1Y_020GD_correct5_5_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/28-Feb-2019/DP_CTQ_BS_allgroups_627_GM_80PI_12GO_15X_06020GD_15Y_01020GD_correct5_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/01-Mar-2019/DP_CTQ_BS_allgroups_627_GM_80PI_12GO_110X_07020GD_110Y_01020GD_correct10_5_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/04-Mar-2019/DP_CTQ_BS_allgroups_627_GM_80PI_12GO_11X_07020GD_11Y_01020GD_correct10_5_XY_SC_3_Diag/final_results/result.mat'}; +% only CTQ +results_paths = {'/volume/HCStress/Analysis/15-Feb-2019/DP_CTQ_allgroups_649_GM_80PI_1GO_1X0_20GD_1Y0_20GD_correct10_10_XY_SC_3_Diag_1/final_results/result.mat',... + '/volume/HCStress/Analysis/17-Feb-2019/DP_CTQ_allgroups_649_GM_80PI_1GO_5X5_20GD_5Y5_20GD_correct10_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/17-Feb-2019/DP_CTQ_allgroups_649_GM_80PI_2GO_5X5_20GD_5Y5_20GD_correct10_10_XY_SC_3_Diag/final_results/result.mat',... + '/volume/HCStress/Analysis/17-Feb-2019/DP_CTQ_allgroups_649_GM_80PI_1GO_5X5_20GD_5Y5_20GD_correct10_10_XY_SC_3_Diag/final_results/result.mat'}; + +% single +results_paths = {'/volume/HCStress/Analysis/17-Feb-2020/DP_CTQ_allgroups_649_GM_80PI_1GO_1X0_20GD_1Y0_20GD_correct5_5_XY_SC_3_Diag/final_results/result.mat'}; + +% try rmdir('/home/dpopovic/.mcrCache8.5', 's') +% catch ME +% end + +IN.specific = {'correlation', 'atlas', 'behavior', 'images', 'sociodemographic'}; % 'correlation', 'atlas', 'behavior', 'images', 'sociodemographic', 'outcome' +IN.SD_selection = {'BDI', 'GAF', 'NEO', 'QOL'}; %'BDI', 'GAF', 'NEO', 'QOL' +IN.type = 1; % 1:new, 2:old + +for i=1:numel(results_paths) + IN.results_path = results_paths{i}; + if any(strfind(IN.results_path, 'CTQ')) + IN.overall_analysis = 'Stress'; + elseif any(strfind(IN.results_path, 'CISS')) + IN.overall_analysis = 'Resilience'; + else + disp('Something''s wrong!'); + end + dp_visualize_data_multi_2020(IN); +% dp_brain_regions(results_paths{i}, 'Stress'); +end + +% compute adjusted p values (FDR) +output = dp_fdr_posthoc_adjust(results_paths{1}); + +%% single groups 4GD +detailed_paths=[]; +detailed_paths.GD_4 = {'/volume/HCStress/Analysis/05-Oct-2018/DP_CTQ_BS_HC_251_GM_80PI_4GD_correct_2/final_results/result.mat',... + '/volume/HCStress/Analysis/05-Oct-2018/DP_CTQ_BS_ROD_123_GM_80PI_4GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/05-Oct-2018/DP_CTQ_BS_CHR_115_GM_80PI_4GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/05-Oct-2018/DP_CTQ_BS_ROP_124_GM_80PI_4GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/06-Oct-2018/DP_CTQ_BS_CHR_ROD_ROP_373_GM_80PI_4GD_correct/final_results/result.mat'}; + +%% single groups 2GD +detailed_paths.GD_2 = {'/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_HC_251_GM_80PI_2GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_ROD_123_GM_80PI_2GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_CHR_115_GM_80PI_2GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_ROP_124_GM_80PI_2GD_correct/final_results/result.mat'}; + +%% single groups 6GD +detailed_paths.GD_6 = {'/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_HC_251_GM_80PI_6GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_ROD_123_GM_80PI_6GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/07-Oct-2018/DP_CTQ_BS_CHR_115_GM_80PI_6GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/08-Oct-2018/DP_CTQ_BS_ROP_124_GM_80PI_6GD_correct/final_results/result.mat'}; + +%% single groups 8GD +detailed_paths.GD_8={'/volume/HCStress/Analysis/09-Oct-2018/DP_CTQ_BS_HC_251_GM_80PI_8GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/09-Oct-2018/DP_CTQ_BS_ROD_123_GM_80PI_8GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/12-Oct-2018/DP_CTQ_BS_CHR_115_GM_80PI_8GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/12-Oct-2018/DP_CTQ_BS_ROP_124_GM_80PI_8GD_correct/final_results/result.mat'}; + +%% single groups 10GD +detailed_paths.GD_10={'/volume/HCStress/Analysis/12-Oct-2018/DP_CTQ_BS_HC_251_GM_80PI_10GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/12-Oct-2018/DP_CTQ_BS_ROD_123_GM_80PI_10GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/13-Oct-2018/DP_CTQ_BS_CHR_115_GM_80PI_10GD_correct/final_results/result.mat',... + '/volume/HCStress/Analysis/13-Oct-2018/DP_CTQ_BS_ROP_124_GM_80PI_10GD_correct/final_results/result.mat'}; + +fields = fieldnames(detailed_paths); + +for i=1:numel(fields) + for ii=1:numel(detailed_paths.(fields{i})) + dp_visualize_data(detailed_paths.(fields{i}){ii}, 'Stress'); + end +end + +for i=1:size(results_paths,2) + load(results_paths{i}); + results_collection.grid_density(i) = input.grid_density(1); + detailed_results_folder = [results_paths{i}(1:(strfind(results_paths{i}, 'final')-1)), 'detailed_results']; + for o=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(o), '.mat']); + if exist('opt_parameters', 'var') + if size(opt_parameters,2) == 11 + index_RHO = opt_RHO; + index_p = opt_p; + elseif size(opt_parameters,2) == 8 + index_RHO = opt_param_RHO; + index_p = opt_param_p; + else + disp('Something''s wrong'); + end + results_collection.range_RHO.(['GD_',num2str(input.grid_density(1,1))])(:,o) = [opt_parameters{:,index_RHO}]; + results_collection.range_p.(['GD_',num2str(input.grid_density(1,1))])(:,o) = [opt_parameters{:,index_p}]; + elseif exist('opt_parameters_temp', 'var') + if size(opt_parameters_temp,2) == 11 + index_RHO = opt_RHO; + index_p = opt_p; + elseif size(opt_parameters_temp,2) == 8 + index_RHO = opt_param_RHO; + index_p = opt_param_p; + else + disp('Something''s wrong'); + end + results_collection.range_RHO.(['GD_',num2str(input.grid_density(1,1))])(:,o) = [opt_parameters_temp{:,index_RHO}]; + results_collection.range_p.(['GD_',num2str(input.grid_density(1,1))])(:,o) = [opt_parameters_temp{:,index_p}]; + end + clear opt_parameters opt_parameters_temp; + end + + if size(output.final_parameters,2) == 11 + index_RHO = opt_RHO; + index_p = opt_p; + index_cu = opt_cu; + index_cv = opt_cv; + elseif size(output.final_parameters,2) == 8 + index_RHO = opt_param_RHO; + index_p = opt_param_p; + index_cu = opt_param_cu; + index_cv = opt_param_cv; + else + disp('Something''s wrong'); + end + + if size(output.pvalue_FDR,2)>1 + output.pvalue_FDR = output.pvalue_FDR'; + end + + try log_sig = [output.final_parameters{:,index_p}]<=output.pvalue_FDR; + catch + log_sig = [output.final_parameters{:,index_p}]'<=output.pvalue_FDR; + end + + results_collection.sig_LV(i) = sum(log_sig); + + temp_cu = cell2mat(output.final_parameters(:, index_cu)); + temp_cv = cell2mat(output.final_parameters(:, index_cv)); + + results_collection.cu_mean(i) = mean(temp_cu(log_sig)); + results_collection.cv_mean(i) = mean(temp_cv(log_sig)); + +end + +fields = fieldnames(results_collection); + +% results_collection_temp.cu_mean = results_collection.cu_mean/sqrt(size(output.final_parameters{1,index_u},1)); +% results_collection_temp.cv_mean = results_collection.cv_mean/sqrt(size(output.final_parameters{1,index_v},1)); + +for r=1:size(fields,1) + temp = results_collection.(fields{r}); + if ~ strcmp(fields{r},'grid_density') + try results_collection_temp.(fields{r}) = temp/(max(temp)); + catch + results_collection_temp.(fields{r}) = temp; + end + else + results_collection_temp.(fields{r}) = temp; + end +end + +%% plot results +collection_folder = '/volume/HCStress/Analysis/Resilience'; +if ~exist(collection_folder) + mkdir(collection_folder); +end +f = figure(); +plot(results_collection.grid_density, results_collection.sig_LV, '--o'); +name = strrep(input.name(1:(strfind(input.name, 'GD')-3)), '_', ' '); +title([name, ' grid density plot']); % add third line +axis([0 max(results_collection.grid_density)+2 0 max(results_collection.sig_LV)+2]); +set(gca, 'XTick', [0 : 20])%1 : results_collection.grid_density(end)]); +set(gca, 'YTick', [0 : 20]);% : results_collection.sig_LV(end)]); +xlabel('grid density'); +ylabel('number of significant LV'); +set(gcf, 'Position', get(0, 'Screensize')); +set(gcf,'PaperPositionMode','auto') +print(f, [collection_folder, '/sig_LV_GD'], '-dpng', '-r0'); +saveas(f, [collection_folder, '/sig_LV_GD.fig']); + +% range p +f = figure(); +fields = fieldnames(results_collection.range_p); +for i=1:size(fields,1) + subplot(round(size(fields,1)/2), 2, i); + boxplot(results_collection.range_p.(fields{i})); + xlabel('LV No.'); + ylabel('p value'); + title(strrep(fields{i},'_',' ')); +end +set(gcf, 'Position', get(0, 'Screensize')); +set(gcf,'PaperPositionMode','auto') +print([collection_folder, '/range_p_GD'], '-dpng', '-r0'); +saveas(f, [collection_folder, '/range_p_GD.fig']); + +% range RHO +f = figure(); +fields = fieldnames(results_collection.range_RHO); +for i=1:size(fields,1) + subplot(round(size(fields,1)/2), 2, i); + boxplot(results_collection.range_RHO.(fields{i})); + xlabel('LV No.'); + ylabel('RHO value'); + title(strrep(fields{i},'_',' ')); +end +set(gcf, 'Position', get(0, 'Screensize')); +set(gcf,'PaperPositionMode','auto') +print([collection_folder, '/range_RHO_GD'], '-dpng', '-r0'); +saveas(f, [collection_folder, '/range_RHO_GD.fig']); + +% plotting GD, sig LV, cu and cv means as a decision function +f = figure(); +temp = results_collection_temp; +x = temp.grid_density; +y1 = temp.sig_LV; +y2 = temp.cu_mean; +y3 = temp.cv_mean; +plot(x,y1, 'b--o', x,y2, 'g--o', x,y3, 'r--o'); +legend('significant LVs','cu mean','cv mean'); +set(gcf, 'Position', get(0, 'Screensize')); +set(gcf,'PaperPositionMode','auto') +print(f, [collection_folder, '/sig_LV_cu_cv'], '-dpng', '-r0'); +saveas(f, [collection_folder, '/sig_LV_cu_cv.fig']); + +close all; diff --git a/Visualization_Module/dp_setup_framework.m b/Visualization_Module/dp_setup_framework.m new file mode 100644 index 0000000..6abe5ac --- /dev/null +++ b/Visualization_Module/dp_setup_framework.m @@ -0,0 +1,68 @@ +%% DP function to create ML framework + +function CV = dp_setup_framework(IN) + +switch IN.type % 1 = nested cross-validation, 2 = random hold-out splits, 3 = LSOV, 4 = random split-half + case 1 + try + CV.cv_outer_indices = struct; + CV.cv_outer_indices = nk_CVpartition2(IN.OB, IN.OF, IN.labels); + catch + disp(['Not enough subjects for nested cross-validation with ', num2str(IN.OF), ' outer folds']); + end + for ob=1:IN.OB + for w=1:IN.OF + CV.cv_outer_test_labels{ob,w} = IN.labels(CV.cv_outer_indices.TestInd{ob,w},:); + CV.cv_outer_train_labels{ob,w} = IN.labels(CV.cv_outer_indices.TrainInd{ob,w},:); + CV.cv_inner_indices{ob,w} = nk_CVpartition2(IN.IB, IN.IF, CV.cv_outer_train_labels{ob,w}); + end + end + + case 2 + try + CV.cv_outer_indices = struct; + CV.cv_outer_indices = dp_HOpartition(IN.OF, IN.labels); + catch + disp('Something went wrong with the hold-out partitions in the outer fold. Please check your label data.'); + end + try + for w=1:IN.OF + CV.cv_outer_test_labels{1,w} = IN.labels(CV.cv_outer_indices.TestInd{1,w},:); + CV.cv_outer_train_labels{1,w} = IN.labels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = dp_HOpartition(IN.IF, CV.cv_outer_train_labels{1,w}); + end + catch + disp('Something went wrong with the hold-out partitions in the inner fold. Please check your label data.'); + end + case 3 + CV.cv_outer_indices = struct; + CV.cv_outer_indices = dp_LSOVpartition(IN.labels); + % if ~isfield(IN, 'additional_NCV') + % IN.additional_NCV = 1; + % end + if isempty(IN.sublabels) + % LSOV on inner loop + for w=1:size(IN.labels,2) + CV.cv_outer_test_labels{1,w} = IN.labels(CV.cv_outer_indices.TestInd{1,w},:); + CV.cv_outer_train_labels{1,w} = IN.labels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = dp_LSOVpartition(CV.cv_outer_train_labels{1,w}); + end + else% CV on inner loop + for w=1:size(IN.labels,2) + CV.cv_outer_test_labels{1,w} = IN.sublabels(CV.cv_outer_indices.TestInd{1,w},:); + CV.cv_outer_train_labels{1,w} = IN.sublabels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = nk_CVpartition2(IN.IB, IN.IF, CV.cv_outer_train_labels{1,w}); + end + end + case 4 + % use randperm to split the sample in half, then do NCV in the half + CV.cv_outer_indices = struct; + CV.cv_outer_indices = dp_RSHpartition(1, IN.labels); + + for w=1:IN.OF + CV.cv_outer_train_labels{1,w} = IN.labels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = nk_CVpartition2(IN.IB, IN.IF, CV.cv_outer_train_labels{1,w}); + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_setup_framework_draper.m b/Visualization_Module/dp_setup_framework_draper.m new file mode 100644 index 0000000..bbbcbae --- /dev/null +++ b/Visualization_Module/dp_setup_framework_draper.m @@ -0,0 +1,75 @@ +%% DP function to create ML framework + +function CV = dp_setup_framework_draper(IN) + +switch IN.type % 1 = nested cross-validation, 2 = random hold-out splits, 3 = LSOV, 4 = random split-half + case 1 + + for ob=1:IN.OB + outer_indices = crossvalind('Kfold',IN.labels,IN.OF); + for oo=1:IN.OF + CV.cv_outer_indices.TrainInd{ob,oo} = find(outer_indices~=oo); + CV.cv_outer_indices.TestInd{ob,oo} = find(outer_indices==oo); + end + end + for ob=1:IN.OB + for w=1:IN.OF + CV.cv_outer_test_labels{ob,w} = IN.labels(CV.cv_outer_indices.TestInd{ob,w},:); + CV.cv_outer_train_labels{ob,w} = IN.labels(CV.cv_outer_indices.TrainInd{ob,w},:); + for ib=1:IN.IB + inner_indices = crossvalind('Kfold',CV.cv_outer_train_labels{ob,w},IN.IF); + for ii=1:IN.IF + CV.cv_inner_indices{ob,w}.TrainInd{ib,ii} = find(inner_indices~=ii); + CV.cv_inner_indices{ob,w}.TestInd{ib,ii} = find(inner_indices==ii); + end + end + end + end + case 2 + try + CV.cv_outer_indices = struct; + CV.cv_outer_indices = dp_HOpartition(IN.OF, IN.labels); + catch + disp('Something went wrong with the hold-out partitions in the outer fold. Please check your label data.'); + end + try + for w=1:IN.OF + CV.cv_outer_test_labels{1,w} = IN.labels(CV.cv_outer_indices.TestInd{1,w},:); + CV.cv_outer_train_labels{1,w} = IN.labels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = dp_HOpartition(IN.IF, CV.cv_outer_train_labels{1,w}); + end + catch + disp('Something went wrong with the hold-out partitions in the inner fold. Please check your label data.'); + end + case 3 + CV.cv_outer_indices = struct; + CV.cv_outer_indices = dp_LSOVpartition(IN.labels); + % if ~isfield(IN, 'additional_NCV') + % IN.additional_NCV = 1; + % end + if isempty(IN.sublabels) + % LSOV on inner loop + for w=1:size(IN.labels,2) + CV.cv_outer_test_labels{1,w} = IN.labels(CV.cv_outer_indices.TestInd{1,w},:); + CV.cv_outer_train_labels{1,w} = IN.labels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = dp_LSOVpartition(CV.cv_outer_train_labels{1,w}); + end + else% CV on inner loop + for w=1:size(IN.labels,2) + CV.cv_outer_test_labels{1,w} = IN.sublabels(CV.cv_outer_indices.TestInd{1,w},:); + CV.cv_outer_train_labels{1,w} = IN.sublabels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = nk_CVpartition2(IN.IB, IN.IF, CV.cv_outer_train_labels{1,w}); + end + end + case 4 + % use randperm to split the sample in half, then do NCV in the half + CV.cv_outer_indices = struct; + CV.cv_outer_indices = dp_RSHpartition(1, IN.labels); + + for w=1:IN.OF + CV.cv_outer_train_labels{1,w} = IN.labels(CV.cv_outer_indices.TrainInd{1,w},:); + CV.cv_inner_indices{1,w} = nk_CVpartition2(IN.IB, IN.IF, CV.cv_outer_train_labels{1,w}); + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_setup_parameters.m b/Visualization_Module/dp_setup_parameters.m new file mode 100644 index 0000000..19b19c5 --- /dev/null +++ b/Visualization_Module/dp_setup_parameters.m @@ -0,0 +1,148 @@ +%% DP function to set up parameters + +function [input, setup, X, Y, B, K, W, OB, IB, size_sets_permutation, size_sets_bootstrap, correlation_method, cs_method, selection_train, selection_retrain, correction_target] = dp_setup_parameters(input, setup) + +if isfield(input, 'X') + try temp = load(input.X); + field = fieldnames(temp); + X = temp.(field{1}); + clear('temp'); + catch + X = input.X; + end +end + +if isfield(input, 'Y') + try temp = load(input.Y); + field = fieldnames(temp); + Y = temp.(field{1}); + clear('temp'); + catch + Y = input.Y; + end +end + +if ~isfield(input, 'correct_limit') + input.correct_limit=1; +end + +if ~isfield(setup, 'max_sim_jobs') + setup.max_sim_jobs = 40; +end + +if ~isfield(input, 'permutation_testing') + input.permutation_testing = 5000; +end +B = input.permutation_testing; + +if ~isfield(input, 'inner_folds') + input.inner_folds = 10; +end +K = input.inner_folds; + +if ~isfield(input, 'outer_folds') + input.outer_folds = 10; +end +W = input.outer_folds; + +if ~isfield(input, 'outer_permutations') + input.outer_permutations = 1; +end +OB = input.outer_permutations; + +if ~isfield(input, 'inner_permutations') + input.inner_permutations = 1; +end +IB = input.inner_permutations; + +if ~isfield(input, 'validation_train') + input.validation_train = 1; +end + +if ~isfield(input, 'size_sets_permutation') + input.size_sets_permutation = round(input.permutation_testing/setup.max_sim_jobs); +end +size_sets_permutation = input.size_sets_permutation; + +if ~isfield(input, 'permutation_testing_precision') + input.permutation_testing_precision = 'lenient'; +end + +if ~isfield(input, 'size_sets_bootstrap') + input.size_sets_bootstrap = round(input.bootstrap_testing/setup.max_sim_jobs); +end +size_sets_bootstrap = input.size_sets_bootstrap; + +if ~isfield(input, 'statistical_testing') + input.statistical_testing = 1; +end + +if ~isfield(input, 'correlation_method') + input.correlation_method = 'Spearman'; +end +correlation_method = input.correlation_method; + +if ~isfield(input, 'cs_method') + input.cs_method.method = 'mean-centering'; + input.cs_method.correction_subgroup = ''; +end +cs_method = input.cs_method; + +if ~isfield(input, 'selection_train') + input.selection_train = 1; +end +selection_train = input.selection_train; + +if ~isfield(input, 'selection_retrain') + input.selection_retrain = 1; +end +selection_retrain = input.selection_retrain; + +if ~isfield(input, 'type_correction') + input.type_correction = 'correct'; +end + +if ~isfield(input, 'merge_train') + input.merge_train = 'median'; +end + +if ~isfield(input, 'merge_retrain') + input.merge_retrain = 'median'; +end + +if ~isfield(input, 'correct_limit') + input.correct_limit = 1; +end + +if ~isfield(input, 'final_merge') + input.final_merge.type = 'best'; +end + +if ~isfield(input, 'correction_target') + input.correction_target = 3; +end +correction_target = input.correction_target; + +if ~isfield(input, 'coun_ts_limit') + input.coun_ts_limit = 1; +end + +if ~isfield(input, 'alpha_value') + input.alpha_value = 0.05; +end + +if ~isfield(input, 'grid_static') && ~isfield(input, 'grid_dynamic') + input.grid_static = 20; +end + +if isfield(input, 'grid_static') + input.grid_dynamic.onset = 1; + input.grid_dynamic.LV_1.x = struct('start', 1, 'end', 0, 'density', input.grid_static); + input.grid_dynamic.LV_1.y = struct('start', 1, 'end', 0, 'density', input.grid_static); +end + +if ~isfield(input, 'additional_NCV') + input.additional_NCV = false; +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_slurm_parallel.m b/Visualization_Module/dp_slurm_parallel.m new file mode 100644 index 0000000..bae7fc6 --- /dev/null +++ b/Visualization_Module/dp_slurm_parallel.m @@ -0,0 +1,38 @@ +%% Script to create bash files + +function [output_file] = dp_slurm_parallel(spls_standalone_path, analysis_folder, type_analysis, mem_total, max_sim_jobs, queue_name, total_jobs, size_sets) + +switch type_analysis + case 'hyperopt' + comp_path = [spls_standalone_path '/hyperopt/for_testing/hyperopt ',... + '$SLURM_ARRAY_TASK_ID $var_size_sets ' analysis_folder ]; + case 'permutation' + comp_path = [spls_standalone_path '/permutation/for_testing/permutation ',... + '$SLURM_ARRAY_TASK_ID $var_size_sets ' analysis_folder ]; + case 'bootstrap' + comp_path = [spls_standalone_path '/bootstrap/for_testing/bootstrap ',... + '$SLURM_ARRAY_TASK_ID ' analysis_folder ]; +end + +FID = fopen([analysis_folder '/' type_analysis '.sbatch'],'w'); + +fprintf(FID,['#!/bin/bash -l \n',... + '# \n',... + '#SBATCH --job-name=' type_analysis ' # Name of the job \n',... + '#SBATCH --array=1-' num2str(total_jobs) ' \n',... + '#SBATCH --ntasks=1 \n',... + '#SBATCH --output=' analysis_folder '/output-' type_analysis '_%A%a.out #output directory \n',... + '#SBATCH --error=' analysis_folder '/error-' type_analysis '_%A%a.err #output directory \n',... + '#SBATCH -D ./ \n',... + 'var_size_sets=' num2str(size_sets) '\n',... + comp_path]); +% '#$ -S /bin/bash \n',... +% '#$ -l mem_total=' num2str(mem_total) 'G \n',... +% '#$ -q ' queue_name ' # This is the computer queue. For high RAM jobs use: psy0cf20. \n',... +% '#$ -tc ' num2str(max_sim_jobs) ' \n',... + + +fclose(FID); +output_file = [analysis_folder '/' type_analysis '.sbatch']; + +end \ No newline at end of file diff --git a/Visualization_Module/dp_sociodemographic.m b/Visualization_Module/dp_sociodemographic.m new file mode 100644 index 0000000..f2d6185 --- /dev/null +++ b/Visualization_Module/dp_sociodemographic.m @@ -0,0 +1,755 @@ +%% DP script for retrieving sociodemographic data +function [input, output, setup] = dp_sociodemographic(IN) + +load(IN.results_path) + +if IN.type ==1 + if any(strfind(IN.results_path, 'CTQ')) + overall_folder = '/volume/HCStress/Analysis/Stress'; + load('/volume/HCStress/Data/Stress_SPLS_DP/Stress_SPLS_DP/DATA/17-Jul-2018/Stress_SPLS_DP_data_table_NM_17-Jul-2018.mat') + elseif any(strfind(IN.results_path, 'CISS')) + overall_folder = '/volume/HCStress/Analysis/Resilience'; + load('/volume/HCStress/Data/BHAM/All_Birmingham_IncludingBrain_data_table_NM_01-Mar-2018.mat') + load([overall_folder, '/education_years.mat']); + for i=1:size(ID_name,1) + try + education_collection(i,1) = education_file{strcmp(education_file(:,1), ID_name{i}),2}; + catch + education_collection(i,1) = NaN; + end + end + data_table_names_NM = [data_table_names_NM, 'DEMOG_T0T1T2_31AA_EducationYears_T0']; + data_table_NM = [data_table_NM, education_collection]; + else + disp('Something''s wrong!'); + end + + if strfind(IN.results_path, '2018') + analysis_date = '2018'; + elseif strfind(IN.results_path, '2019') + analysis_date = '2019'; + elseif strfind(IN.results_path, '2020') + analysis_date = '2020'; + end + + folder_name = IN.results_path(5+strfind(IN.results_path, analysis_date):(strfind(IN.results_path, '/final_results')-1)); + collection_folder = [overall_folder, '/', folder_name]; + mkdir(collection_folder); + + s_folder = [collection_folder, '/sociodemographic']; + mkdir(s_folder); + + % IN.results_path = '/volume/HCStress/Analysis/15-Feb-2019/DP_CTQ_allgroups_649_GM_80PI_1GO_1X0_20GD_1Y0_20GD_correct10_10_XY_SC_3_Diag_1/final_results/result.mat'; + load(IN.results_path); + load('/volume/HCStress/Data/EHI_data_for_req_PSN.mat'); + load('/volume/DP_FEF/WHOQOL_data.mat'); + + % load('/volume/data/PRONIA/DataDump/29-Jan-2019/table_export/table_export_pruned/Self_Rating_Instruments/complete_information_T0PAT_EHI_SV_pruned.mat'); + temp_EHI_01=[]; + + for i=1:size(input.data_collection.PSN_BOG,1) + try + temp_EHI_01(i,1)=cell2mat(data_table.EHI_01(strcmp(data_table.PATIENT_ID, input.data_collection.PSN_BOG{i,1}))); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + temp_EHI_01(i,1)=NaN; + end + end + + % temp_EHI_01{cellfun(@isempty, temp_EHI_01)}=NaN; + EHI_01 = temp_EHI_01; + EHI_01(EHI_01>0)=1; + EHI_01(EHI_01<0)=0; + data_table_names_NM = [data_table_names_NM, 'EHI_01']; + addpath(genpath('/opt/NM/NeuroMiner_Release/')); + addpath('/opt/SPM/spm12_v6685_cat12_r1207/'); + addpath(genpath('/volume/DP_FEF/ScrFun/ScriptsRepository')); + + load(input.MRI); + + % % remove subjects that have less than 80% complete GAF, impute the rest + % input.data_collection.PSN_BOG(GAF_T1_toremove&pat_study,:)=[]; + + %% get data + % for i=1:size(input.data_collection.PSN_BOG,1) + data_table_selected=[]; + for i=1:size(input.data_collection.PSN_BOG,1) + try + data_table_selected(i,:)=data_table_NM(strcmp(ID_name, input.data_collection.PSN_BOG{i}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + data_table_selected(i,:)=NaN; + end + end + + % data_table_selected = data_table_NM(ismember(ID_name, input.data_collection.PSN_BOG(i,1)), :); + % end + + data_table_selected = [data_table_selected, EHI_01]; + + SD_variables1 = {'AGE_T0', 'SEX_T0', 'DEMOG_T0T1T2_31AA_EducationYears_T0', 'GF_S_1_Current_T0',... + 'GF_R_1_Current_T0', 'GAF_S_PastMonth_Screening', 'GAF_DI_PastMonth_Screening', 'EHI_01'}; + + % SD_variables_screen ={'GAF', 'GF'}; + % temp=[]; + % for i=1:size(SD_variables_screen,2) + % temp1 = data_table_names_NM(~cellfun(@isempty,(strfind(data_table_names_NM, SD_variables_screen{i})))); + % temp=[temp,temp1]; + % end + + % data_table_names_NM_selected = cellfun(data_table_names_NM, @(x) ~isempty(strfind(x, SD_variables{1}))); + + temp=[]; + for i=1:size(SD_variables1,2) + temp1 = data_table_names_NM(strcmp(data_table_names_NM, SD_variables1{i})); + temp=[temp,temp1]; + end + + names_selected1 = temp; + + names_selected1 = temp; + for i=1:size(names_selected1,2) + data_table_selected_1(:,i) = data_table_selected(:,strcmp(data_table_names_NM, names_selected1(i))); + end + + SD_variables2 = {'PANSS', 'BDI'}; + temp=[]; + + for i=1:size(SD_variables2,2) + temp1 = data_table_names_NM(~cellfun(@isempty,(strfind(data_table_names_NM, SD_variables2{i})))); + temp=[temp,temp1]; + end + + names_selected2 = temp(~cellfun(@isempty,(strfind(temp, 'T0')))); + + names_selected_all = [names_selected1, names_selected2]; + + % compute PANSS scores + panss_total_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0','PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0','PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; + panss_pos_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0'}; + panss_neg_names = {'PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0'}; + panss_gen_names = {'PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; + + % panss positive scores + panss_total = sum(data_table_selected(:,ismember(data_table_names_NM, panss_total_names)),2); + panss_pos = sum(data_table_selected(:,ismember(data_table_names_NM, panss_pos_names)),2); + panss_neg = sum(data_table_selected(:,ismember(data_table_names_NM, panss_neg_names)),2); + panss_gen = sum(data_table_selected(:,ismember(data_table_names_NM, panss_gen_names)),2); + + % compute BDI scores + BDI_names = {'BDI2_01_T0','BDI2_02_T0','BDI2_03_T0','BDI2_04_T0','BDI2_05_T0',... + 'BDI2_06_T0','BDI2_07_T0','BDI2_08_T0','BDI2_09_T0','BDI2_10_T0','BDI2_11_T0',... + 'BDI2_12_T0','BDI2_13_T0','BDI2_14_T0','BDI2_15_T0','BDI2_16_T0','BDI2_17_T0',... + 'BDI2_18_T0','BDI2_19_T0','BDI2_20_T0','BDI2_21_T0'}; + + BDI_scores = sum(data_table_selected(:,ismember(data_table_names_NM, BDI_names)),2); + + data_table_selected_2 = [panss_total, panss_pos, panss_neg, panss_gen, BDI_scores]; + data_table_selected_2_names = {'PANSS_total', 'PANSS_pos', 'PANSS_neg', 'PANSS_gen', 'BDI_scores'}; + + + +output.data_table_study.all = [data_table_selected_1, data_table_selected_2]; +output.data_table_study_names = [names_selected1, data_table_selected_2_names]; + +end + + +% if any(strfind(IN.results_path, 'CTQ')) +% overall_folder = '/volume/HCStress/Analysis/Stress'; +% elseif any(strfind(IN.results_path, 'CISS')) +% overall_folder = '/volume/HCStress/Analysis/Resilience'; +% end +% +% if strfind(IN.results_path, '2018') +% analysis_date = '2018'; +% elseif strfind(IN.results_path, '2019') +% analysis_date = '2019'; +% else +% disp('Something is wrong with the file'); +% end +% +% folder_name = IN.results_path(5+strfind(IN.results_path, analysis_date):(strfind(IN.results_path, '/final_results')-1)); +% collection_folder = [overall_folder, '/', folder_name]; +% mkdir(collection_folder); +% +% s_folder = [collection_folder, '/sociodemographic']; +% mkdir(s_folder); + +% sort according to diagnostic groups +studygroups = {'HC', 'ROD', 'CHR', 'ROP'}; +for i=1:size(studygroups,2) + output.data_table_study.(studygroups{i}) = output.data_table_study.all(strcmp(input.data_collection.Labels, studygroups{i}),:); +end + +fields = fieldnames (output.data_table_study); +output.data_table_final=struct; +for i=1:size(fields,1) + output.data_table_final.means(:,i) = (nanmean(output.data_table_study.(fields{i}),1))'; + output.data_table_final.std(:,i) = (nanstd(output.data_table_study.(fields{i}),1))'; +end + +output.data_table_final.names = output.data_table_study_names'; +output.data_table_final.labels = ['all', studygroups]; + +% get site numbers for diagnoses +studygroups = {'HC', 'ROD', 'CHR', 'ROP'}; +for i=1:size(input.sites,1) + sites_all(i,1) = find(input.sites(i,:)==1); +end + +sex_all = output.data_table_study.all(:,strcmp(output.data_table_study_names, 'SEX_T0')); +hand_all = output.data_table_study.all(:,strcmp(output.data_table_study_names, 'EHI_01')); + +for i=1:size(studygroups,2) + output.sites_numbers.(studygroups{i}).raw = sites_all(strcmp(input.data_collection.Labels, studygroups{i}),:); + output.sites_numbers.(studygroups{i}).raw = output.sites_numbers.(studygroups{i}).raw(~isnan(output.sites_numbers.(studygroups{i}).raw)); + output.sex_numbers.(studygroups{i}).raw = sex_all(strcmp(input.data_collection.Labels, studygroups{i}),:); + output.sex_numbers.(studygroups{i}).raw = output.sex_numbers.(studygroups{i}).raw(~isnan(output.sex_numbers.(studygroups{i}).raw)); + output.hand_numbers.(studygroups{i}).raw = hand_all(strcmp(input.data_collection.Labels, studygroups{i}),:); + output.hand_numbers.(studygroups{i}).raw = output.hand_numbers.(studygroups{i}).raw(~isnan(output.hand_numbers.(studygroups{i}).raw)); +end + +for i=1:size(studygroups,2) + [C, ~, ic] = unique(output.sites_numbers.(studygroups{i}).raw); + counts = accumarray(ic, 1); + output.sites_numbers.(studygroups{i}).count = [C, counts]; +% dp_txt_write(s_folder, ['sites_', studygroups{i}], output.sites_numbers.(studygroups{i}).count', '%d \t %d \n'); + [C, ~, ic] = unique(output.sex_numbers.(studygroups{i}).raw); + counts = accumarray(ic, 1); + output.sex_numbers.(studygroups{i}).count = [C, counts]; + [C, ~, ic] = unique(output.hand_numbers.(studygroups{i}).raw); + counts = accumarray(ic, 1); + output.hand_numbers.(studygroups{i}).count = [C, counts]; +end + +% test for imbalances +output.collected_sites = [output.sites_numbers.HC.count(:,2), output.sites_numbers.ROD.count(:,2),output.sites_numbers.CHR.count(:,2),output.sites_numbers.ROP.count(:,2)]; +output.collected_sex = [output.sex_numbers.HC.count(:,2), output.sex_numbers.ROD.count(:,2),output.sex_numbers.CHR.count(:,2),output.sex_numbers.ROP.count(:,2)]; +output.collected_hand = [output.hand_numbers.HC.count(:,2), output.hand_numbers.ROD.count(:,2),output.hand_numbers.CHR.count(:,2),output.hand_numbers.ROP.count(:,2)]; + +% [output.sites.collection, output.sites.collection_names, output.sites.h, output.sites.p, output.sites.stats]=dp_chi2(output.collected_sites, 'absolute'); +% [output.sex.collection, output.sex.collection_names, output.sex.h, output.sex.p, output.sex.stats]=dp_chi2(output.collected_sex, 'absolute'); +% [output.hand.collection, output.hand.collection_names, output.hand.h, output.hand.p, output.hand.stats]=dp_chi2(sum(output.collected_hand,2), 'absolute'); + +nn=1; +for i=1:size(output.data_table_final.means,1) + output.data_table_final.complete(nn,:) = output.data_table_final.means(i,:); + nn=nn+1; + output.data_table_final.complete(nn,:) = output.data_table_final.std(i,:); + nn=nn+1; +end + +% dp_txt_write('/volume/HCStress/Analysis/Stress/', 'CTQ_BS_627_means', output.data_table_final.means', '%.1f \t %.1f \t %.1f \t %.1f \t %.1f \t \n'); +% dp_txt_write('/volume/HCStress/Analysis/Stress/', 'CTQ_BS_627_std', output.data_table_final.std', '%.1f \t %.1f \t %.1f \t %.1f \t %.1f \t \n'); +% % dp_txt_write('/volume/HCStress/Analysis/Stress/', 'CTQ_BS_627_names', output.data_table_final.names, '%s \n'); +% % dp_txt_write('/volume/HCStress/Analysis/Stress/', 'CTQ_BS_627_labels', output.data_table_final.labels, '%s'); +dp_txt_write(s_folder, 'complete_table', output.data_table_final.complete', '%.2f \t %.2f \t %.2f \t %.2f \t %.2f \t \n'); + +%% test for significant differences between groups +% first test with ANOVA for overall differences +groups_to_choose = {'ROD', 'CHR', 'ROP'}; +log_groups_to_choose = ismember(input.data_collection.Labels, groups_to_choose); + +for i=1:size(output.data_table_study_names,2) + [p,tbl,stats] = kruskalwallis(output.data_table_study.all(log_groups_to_choose,i), input.data_collection.Labels(log_groups_to_choose)); + output.KW_results(i,:) = [p,tbl{2,5}]; + [output.Dunn_results{i,1}, ~, ~, output.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all + dp_txt_write(s_folder, ['Dunn_results_', num2str(i)], output.Dunn_results{i,1}', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \t %.3f \t \n \n'); +end + +dp_txt_write(s_folder, 'KW_results', output.KW_results', '%.3f \t %.3f \n \n'); +dp_txt_write(s_folder, 'Dunn_labels', '', '%s'); +for i=1:size(output.Dunn_results_labels,1) + FID = fopen([s_folder, '/Dunn_labels.txt'], 'a'); + fprintf(FID, '%s\n', output.Dunn_results_labels{i,1}); + fclose(FID); + dp_txt_write(s_folder, 'Dunn_labels', [output.Dunn_results_labels{:}], '%s \n'); +end + +% compute differences regarding latent scores between study groups +groups_to_choose = {'HC', 'ROD', 'CHR', 'ROP'}; +log_groups_to_choose = ismember(input.data_collection.Labels, groups_to_choose); +output.latent_scores = struct; + +for i=1:size(output.final_parameters,1) +% output.latent_scores.IN.(['LV_', num2str(i)]).epsilon=struct; +% output.latent_scores.data.(['LV_', num2str(i)])(:,1)=dp_standardize(output.final_parameters{i,strcmp(output.parameters_names, 'epsilon')}, output.latent_scores.IN.(['LV_', num2str(i)]).epsilon); + output.latent_scores.data.(['LV_', num2str(i)])(:,1)=output.final_parameters{i,strcmp(output.parameters_names, 'epsilon')}; + output.latent_scores.median_iqr.groups = {'HC', 'ROD', 'CHR', 'ROP'}; + for ii=1:size(output.latent_scores.median_iqr.groups,2) + output.latent_scores.median_iqr.(['LV_', num2str(i)]).epsilon(1,ii) = median(output.latent_scores.data.(['LV_', num2str(i)])(strcmp(input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},1), output.latent_scores.median_iqr.groups{ii}),1)); + output.latent_scores.median_iqr.(['LV_', num2str(i)]).epsilon(2,ii) = iqr(output.latent_scores.data.(['LV_', num2str(i)])(strcmp(input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},1), output.latent_scores.median_iqr.groups{ii}),1)); + end +% output.latent_scores.IN.(['LV_', num2str(i)]).omega=struct; +% output.latent_scores.data.(['LV_', num2str(i)])(:,2)=dp_standardize(output.final_parameters{i,strcmp(output.parameters_names, 'omega')}, output.latent_scores.IN.(['LV_', num2str(i)]).omega); + output.latent_scores.data.(['LV_', num2str(i)])(:,2)=output.final_parameters{i,strcmp(output.parameters_names, 'omega')}; + for ii=1:size(output.latent_scores.median_iqr.groups,2) + output.latent_scores.median_iqr.(['LV_', num2str(i)]).omega(1,ii) = median(output.latent_scores.data.(['LV_', num2str(i)])(strcmp(input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},1), output.latent_scores.median_iqr.groups{ii}),2)); + output.latent_scores.median_iqr.(['LV_', num2str(i)]).omega(2,ii) = iqr(output.latent_scores.data.(['LV_', num2str(i)])(strcmp(input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},1), output.latent_scores.median_iqr.groups{ii}),2)); + end +end + +% output.latent_scores = struct; +for i=1:size(output.final_parameters,1) +% output.latent_scores.data.(['LV_', num2str(i)])(:,1)=output.final_parameters{i,strcmp(output.parameters_names, 'epsilon')}; +% output.latent_scores.data.(['LV_', num2str(i)])(:,2)=output.final_parameters{i,strcmp(output.parameters_names, 'omega')}; + for ii=1:size(output.latent_scores.data.(['LV_', num2str(i)]),2) + [p,tbl,stats] = kruskalwallis(output.latent_scores.data.(['LV_', num2str(i)])(:,ii), input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},1)); + output.latent_scores.KW_results.(['LV_', num2str(i)])(:,ii) = [p,tbl{2,5}]'; + [output.latent_scores.Dunn_results.(['LV_', num2str(i)]){1,ii}, ~, ~, output.latent_scores.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all + end +% dp_txt_write(s_folder, ['Dunn_results_', num2str(i)], output.latent_scores_differences.Dunn_results{i,1}', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \t %.3f \t \n \n'); +end + +% if ANOVA shows significant differences, then do binary tests for all +% groups, while correcting for multiple testing + +%% look for specific correlations +corr_data=[]; +corr_variables=[]; + +if any(strcmp(IN.SD_selection, 'GAF')) + + % get GAF data + + GAF_names = {'GAF_S_LifeTime_Screening','GAF_S_PastYearT0_Screening',... + 'GAF_S_PastMonth_Screening','GAF_DI_LifeTime_Screening','GAF_DI_PastYear_Screening',... + 'GAF_DI_PastMonth_Screening', 'GF_S_1_Current_T0','GF_S_2_LowPastYearT0_T0',... + 'GF_S_3_HighPastYearT0_T0','GF_S_4_HighLifetimeT0_T0', 'GF_R_1_Current_T0',... + 'GF_R_2_LowPastYearT0_T0','GF_R_3_HighPastYearT0_T0','GF_R_4_HighLifetimeT0_T0'}; + + + % GAF_variables = data_table_names_NM(~cellfun(@isempty,(strfind(data_table_names_NM, 'GAF')))); + GAF_data=[]; + for i=1:size(GAF_names,2) + temp = data_table_selected(:, strcmp(data_table_names_NM, GAF_names{i})); + GAF_data=[GAF_data,temp]; + end + corr_variables = [corr_variables, GAF_names]; + corr_data = [corr_data, GAF_data]; +end + +if any(strcmp(IN.SD_selection, 'BDI')) + + % get BDI data + corr_variables = [corr_variables, 'BDI']; + corr_data = [corr_data, BDI_scores]; + +end + +if any(strcmp(IN.SD_selection, 'NEO')) + + % get NEO FFI data + neo_log = ~cellfun(@isempty,(strfind(data_table_names_NM, 'NEO'))); + neo_names = data_table_names_NM(neo_log); + neo_data_temp = data_table_selected(:, neo_log); + + NEO.neuroticism.negative_affect = [1, 11, 16, 31, 46]; + NEO.neuroticism.self_reproach = [6, 21, 26, 36, 41, 51, 56]; + NEO.extraversion.positive_affect = [7, 12, 37, 42]; + NEO.extraversion.sociability = [2, 17, 27, 57]; + NEO.extraversion.activity = [22, 32, 47, 52]; + NEO.openness.aesthetic_interests = [13, 23, 43]; + NEO.openness.intellectual_interests = [33, 48, 53, 58]; + NEO.openness.unconventionality = [3,8,18, 28, 38]; + NEO.agreeableness.nonantagonistic_orientation = [9,14,19,24,29,44,54,59]; + NEO.agreeableness.prosocial_orientation = [4, 34, 39, 49]; + NEO.conscientiousness.orderliness = [5,10,15,30,55]; + NEO.conscientiousness.goal_striving = [25, 35, 60]; + NEO.conscientiousness.dependability = [20, 40, 45, 50]; + NEO_inverse_questions = [1,16,31,46,12,42,27,57,23,33,48,3,8,18,38,9,14,24,29,44,54,59,39,15,30,55,45]; + NEO_inverse_algorithm = [1:1:5;5:-1:1]; + NEO_poorly_functioning = [6,12,27,42,3,8,28,38,9,19,24,29,34,15]; + + %exclude poorly functioning questions (optional) + fields=fieldnames(NEO); + for i=1:size(fields,1) + fields1=fieldnames(NEO.(fields{i})); + for ii=1:size(fields1,1) + temp = NEO.(fields{i}).(fields1{ii}); + log = ismember(temp, NEO_poorly_functioning); + temp(log)=[]; + NEO.(fields{i}).(fields1{ii})=temp; + end + end + + neo_data_inv=[]; + for i=1:size(neo_data_temp,1) + temp_row = neo_data_temp(i,:); + for ii=1:size(temp_row,2) + if sum(ii==NEO_inverse_questions)>0 + try + temp_row(ii)=NEO_inverse_algorithm(2,temp_row(ii)==NEO_inverse_algorithm(1,:)); + catch + temp_row(ii)=NaN; + end + end + end + neo_data_inv(i,:)=temp_row; + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(neo_data_inv),2)<=(0.2*size(neo_data_inv,2)); + temp_neo_data_inv = dp_impute(neo_data_inv(log_impute,:), 'euclidean'); + neo_data_inv_imp = neo_data_inv; + neo_data_inv_imp(log_impute,:) = temp_neo_data_inv; + + % compute sum scores + fields = fieldnames(NEO); + for i=1:size(fields,1) + fields1 = fieldnames(NEO.(fields{i})); + temp_collect.(fields{i})=[]; + for ii=1:size(fields1,1) + temp_collect.(fields{i}) = [temp_collect.(fields{i}), NEO.(fields{i}).(fields1{ii})]; + end + end + + fields=fieldnames(temp_collect); + neo_names=[]; + for i=1:size(fields,1) + neo_names=[neo_names, {['NEO_', fields{i}]}]; + end + + neo_data_foranalysis = neo_data_inv_imp; + for i=1:size(neo_data_foranalysis,1) + temp_sum=[]; + for ii=1:size(fields,1) + temp_sum = [temp_sum, sum(neo_data_foranalysis(i,temp_collect.(fields{ii})))]; + end + neo_data_sum(i,:)=temp_sum; + end + + % add NEO FFI data to corr data + + corr_variables = [corr_variables,neo_names]; + corr_data = [corr_data,neo_data_sum]; +end + + +% get QUOL data +if any(strcmp(IN.SD_selection, 'QOL')) + WHO_table = [HC;PAT]; + + for i=1:26 + if i<10 + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_0', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_0', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + else + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + end + end + + for i=1:size(WHO_raw,1) + temp=WHO_raw(i,:); + log=cellfun(@isempty, temp); + temp(log)={num2str(NaN)}; + WHO_raw(i,:)=temp; + for ii=1:size(temp,2) + try + temp_new(i,ii)=str2num(temp{ii}); + catch + temp_new(i,ii)=NaN; + end + end + end + + WHO_new=temp_new; + + WHO_ID=[HC.PATIENT_ID; PAT.PATIENT_ID]; + + WHO_selected=[]; + for i=1:size(input.data_collection.PSN_BOG,1) + try + WHO_selected(i,:)=WHO_new(strcmp(WHO_ID, input.data_collection.PSN_BOG{i}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + WHO_selected(i,:)=NaN; + end + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(WHO_selected),2)<=(0.2*size(WHO_selected,2)); + temp_WHO_selected = dp_impute(WHO_selected(log_impute,:), 'euclidean'); + WHO_selected_imp = WHO_selected; + WHO_selected_imp(log_impute,:) = temp_WHO_selected; + + % compute sum scores + WHO.physical = [3, 4, 10, 15, 16, 17, 18]; + WHO.psychosocial = [5, 6, 7, 11, 19, 26]; + WHO.social_relationship = [20, 21, 22]; + WHO.environment = [8, 9, 12, 13, 14, 23, 24, 25]; + + WHO_sum = [sum(WHO_selected_imp(:,WHO.physical),2), sum(WHO_selected_imp(:,WHO.psychosocial),2),... + sum(WHO_selected_imp(:,WHO.social_relationship),2), sum(WHO_selected_imp(:,WHO.environment),2)]; + + WHO_names = {'WHO_physical', 'WHO_psychosocial', 'WHO_social_relationship', 'WHO_environment'}; + + corr_variables = [corr_variables,WHO_names]; + corr_data = [corr_data,WHO_sum]; +end + + + +%% other stuff +% corr_tokeep = sum(isnan(corr_data),2)==0; +% +% MRI_for_analysis_pruned = MRI_for_analysis(corr_tokeep,:); +% fields=fieldnames(input.data_collection); +% for i=1:size(fields,1) +% temp = input.data_collection.(fields{i}); +% try +% input.data_collection.(fields{i}) = temp(corr_tokeep,:); +% end +% end +% behavior_pruned = input.behavior(corr_tokeep,:); +% corr_data_pruned = corr_data(corr_tokeep,:); +% +% % correct MRI data, behavioral data and correlation data for site effects +% % just like in main analysis +% IN = struct; +% IN.TrCovars = input.sites(corr_tokeep,:); +% [MRI_for_analysis_c, ~] = nk_PartialCorrelationsObj(MRI_for_analysis_pruned, IN); +% IN = struct; +% IN.TrCovars = input.sites(corr_tokeep,:); +% [behavior_c, ~] = nk_PartialCorrelationsObj(behavior_pruned, IN); +% IN = struct; +% % IN.TrCovars = input.sites(corr_tokeep,:); +% % [corr_data, ~] = nk_PartialCorrelationsObj(corr_data, IN); +% +% studygroups = {'all' 'HC', 'ROD', 'CHR', 'ROP'}; +% for i=1:size(studygroups,2) +% switch studygroups{i} +% case 'all' +% corr_table_study.(studygroups{i}) = corr_data_pruned; +% X_table_study.(studygroups{i}) = MRI_for_analysis_c; +% Y_table_study.(studygroups{i}) = behavior_c; +% otherwise +% corr_table_study.(studygroups{i}) = corr_data_pruned(strcmp(input.data_collection.Labels, studygroups{i}),:); +% X_table_study.(studygroups{i}) = MRI_for_analysis_c(strcmp(input.data_collection.Labels, studygroups{i}),:); +% Y_table_study.(studygroups{i}) = behavior_c(strcmp(input.data_collection.Labels, studygroups{i}),:); +% end +% % corr_table_study.(studygroups{i}) = dp_standardize GAF_data; +% % X_table_study.(studygroups{i}) = MRI_for_analysis; +% % Y_table_study.(studygroups{i}) = input.behavior; +% end + +% % assign X and Y matrices for all subjects +% corr_type = 'Spearman'; +% for i=1:size(output.final_parameters,1) +% % IN=struct; +% % IN.method = 'mean-centering'; +% % X = dp_standardize(X, IN); +% % IN=struct; +% % IN.method = 'mean-centering'; +% % Y = dp_standardize(Y, IN); +% u = output.final_parameters{i,4}; +% v = output.final_parameters{i,5}; +% % epsilon = X*u; +% % omega = Y*v; +% for ii=1:size(studygroups,2) +% corr_scores = corr_table_study.(studygroups{ii}); +% for iii=1:size(corr_variables,2) +% log_corr = strcmp(corr_variables, corr_variables{iii}); +% epsilon = X_table_study.(studygroups{ii})*u; +% omega = Y_table_study.(studygroups{ii})*v; +% corr_data = corr_scores(:,log_corr); +% [RHO, p] = corr(corr_data, epsilon, 'Type', corr_type, 'Rows', 'complete'); +% corr_data_correlation.(['LV_', num2str(i)]).epsilon.(studygroups{ii}).(corr_variables{iii}) = [RHO,p]; +% [RHO, p] = corr(corr_data, omega, 'Type', corr_type, 'Rows', 'complete'); +% corr_data_correlation.(['LV_', num2str(i)]).omega.(studygroups{ii}).(corr_variables{iii}) = [RHO, p]; +% end +% [X_table_study.(studygroups{ii}),Y_table_study.(studygroups{ii})] = proj_def(X_table_study.(studygroups{ii}), Y_table_study.(studygroups{ii}), u, v); +% end +% end + +% test correlations for entire sample and for held out subjects: GAF, GF, +% BDI + + +%% proper correlations +log_epsilon_opt = strcmp(output.parameters_names, 'epsilon'); +log_omega_opt = strcmp(output.parameters_names, 'omega'); +log_epsilon_all = strcmp(output.parameters_names, 'epsilon_all'); +log_omega_all = strcmp(output.parameters_names, 'omega_all'); +output.hold_out_corr_data = []; +output.hold_out_correlations = []; +output.all_corr_data = []; +output.all_correlations=[]; +output.hold_out_sig=[]; +output.hold_out_FDR_values=[]; +output.all_sig=[]; +output.all_FDR_values=[]; + +for i=1:size(output.final_parameters,1) + + hold_out_corr_data = corr_data(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},:); + hold_out_epsilon_opt = output.final_parameters{i,log_epsilon_opt}; + hold_out_omega_opt = output.final_parameters{i,log_omega_opt}; + hold_out_labels = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},1); + + all_corr_data = corr_data; + all_epsilon = output.final_parameters{i,log_epsilon_all}; + all_omega = output.final_parameters{i,log_omega_all}; + labels_all = input.data_collection.Labels; + + for ii=1:size(corr_variables,2) + + % hold out data + hold_out_corr_temp = hold_out_corr_data(:,ii); + output.hold_out_corr_data.(['LV_', num2str(i)]).(corr_variables{ii}) = hold_out_corr_temp; + [RHO,p] = corr(hold_out_epsilon_opt, hold_out_corr_temp, 'Rows', 'complete', 'Type', 'Spearman'); + output.hold_out_correlations.(['LV_', num2str(i)]).epsilon.all.(corr_variables{ii}) = [RHO, p]; + [RHO,p] = corr(hold_out_omega_opt, hold_out_corr_temp, 'Rows', 'complete', 'Type', 'Spearman'); + output.hold_out_correlations.(['LV_', num2str(i)]).omega.all.(corr_variables{ii}) = [RHO, p]; + + % all data + all_corr_temp = all_corr_data(:,ii); + output.all_corr_data.(['LV_', num2str(i)]).(corr_variables{ii}) = all_corr_temp; + [RHO,p] = corr(all_epsilon, all_corr_temp, 'Rows', 'complete', 'Type', 'Spearman'); + output.all_correlations.(['LV_', num2str(i)]).epsilon.all.(corr_variables{ii}) = [RHO, p]; + [RHO,p] = corr(all_omega, all_corr_temp, 'Rows', 'complete', 'Type', 'Spearman'); + output.all_correlations.(['LV_', num2str(i)]).omega.all.(corr_variables{ii}) = [RHO, p]; + + for iii=1:size(input.selected_studygroups,2) + %hold out data + log_group = strcmp(hold_out_labels, input.selected_studygroups{1,iii}); + hold_out_corr_data_group = hold_out_corr_temp(log_group); + hold_out_epsilon_group = hold_out_epsilon_opt(log_group); + hold_out_omega_group = hold_out_omega_opt(log_group); + [RHO,p] = corr(hold_out_epsilon_group, hold_out_corr_data_group, 'Rows', 'complete', 'Type', 'Spearman'); + output.hold_out_correlations.(['LV_', num2str(i)]).epsilon.(input.selected_studygroups{1,iii}).(corr_variables{ii}) = [RHO, p]; + [RHO,p] = corr(hold_out_omega_group, hold_out_corr_data_group, 'Rows', 'complete', 'Type', 'Spearman'); + output.hold_out_correlations.(['LV_', num2str(i)]).omega.(input.selected_studygroups{1,iii}).(corr_variables{ii}) = [RHO, p]; + + % all data + log_group = strcmp(labels_all, input.selected_studygroups{1,iii}); + all_corr_data_group = all_corr_temp(log_group); + all_epsilon_group = all_epsilon(log_group); + all_omega_group = all_omega(log_group); + [RHO,p] = corr(all_epsilon_group, all_corr_data_group, 'Rows', 'complete', 'Type', 'Spearman'); + output.all_correlations.(['LV_', num2str(i)]).epsilon.(input.selected_studygroups{1,iii}).(corr_variables{ii}) = [RHO, p]; + [RHO,p] = corr(all_omega_group, all_corr_data_group, 'Rows', 'complete', 'Type', 'Spearman'); + output.all_correlations.(['LV_', num2str(i)]).omega.(input.selected_studygroups{1,iii}).(corr_variables{ii}) = [RHO, p]; + + end + end +end + +% test for significance +fields1=fieldnames(output.hold_out_correlations); +for i=1:size(fields1,1) + fields2=fieldnames(output.hold_out_correlations.(fields1{i})); + for ii=1:size(fields2,1) + fields3 = fieldnames(output.hold_out_correlations.(fields1{i}).(fields2{ii})); + for iii=1:size(fields3,1) + fields4 = fieldnames(output.hold_out_correlations.(fields1{i}).(fields2{ii}).(fields3{iii})); + temp_p=[]; + for iiii=1:size(fields4,1) + pp=output.hold_out_correlations.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii})(1,2); + temp_p=[temp_p;pp]; + end + output.hold_out_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii}) = dp_FDR(temp_p, 0.05); + if output.hold_out_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii})>0 + fields4 = fieldnames(output.hold_out_correlations.(fields1{i}).(fields2{ii}).(fields3{iii})); + temp_p=[]; + for iiii=1:size(fields4,1) + pp=output.hold_out_correlations.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii})(1,2); + if pp <= output.hold_out_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii}) + output.hold_out_sig.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii}) = output.hold_out_correlations.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii}); + end + end + end + end + + end +end + +fields1=fieldnames(output.all_correlations); +for i=1:size(fields1,1) + fields2=fieldnames(output.all_correlations.(fields1{i})); + for ii=1:size(fields2,1) + fields3 = fieldnames(output.all_correlations.(fields1{i}).(fields2{ii})); + for iii=1:size(fields3,1) + fields4 = fieldnames(output.all_correlations.(fields1{i}).(fields2{ii}).(fields3{iii})); + temp_p=[]; + for iiii=1:size(fields4,1) + pp=output.all_correlations.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii})(1,2); + temp_p=[temp_p;pp]; + end + output.all_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii}) = dp_FDR(temp_p, 0.05); + if output.all_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii})>0 + fields4 = fieldnames(output.all_correlations.(fields1{i}).(fields2{ii}).(fields3{iii})); + temp_p=[]; + for iiii=1:size(fields4,1) + pp=output.all_correlations.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii})(1,2); + if pp <= output.all_FDR_values.(fields1{i}).(fields2{ii}).(fields3{iii}) + output.all_sig.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii}) = output.all_correlations.(fields1{i}).(fields2{ii}).(fields3{iii}).(fields4{iiii}); + end + end + end + end + + end +end + +save(IN.results_path, 'input', 'output', 'setup'); + +%% collect correlations + +subsets = {'all', 'hold_out'}; + +for s=1:size(subsets,2) + LVs = fieldnames(output.([subsets{s}, '_correlations'])); + for i=1:size(LVs,1) + latent_scores = fieldnames(output.([subsets{s}, '_correlations']).(LVs{i})); + for ii=1:size(latent_scores,1) + groups = fieldnames(output.([subsets{s}, '_correlations']).(LVs{i}).(latent_scores{ii})); + output.(['tables_', subsets{s}, '_Rho_p']).(LVs{i}).(latent_scores{ii})=[]; + for iii=1:size(groups,1) + subfields = fieldnames(output.([subsets{s}, '_correlations']).(LVs{i}).(latent_scores{ii}).(groups{iii})); + nn=1; + for iiii=1:size(subfields,1) + output.(['tables_', subsets{s}, '_Rho_p']).(LVs{i}).(latent_scores{ii})(nn,iii) = output.([subsets{s}, '_correlations']).(LVs{i}).(latent_scores{ii}).(groups{iii}).(subfields{iiii})(1); + nn=nn+1; + output.(['tables_', subsets{s}, '_Rho_p']).(LVs{i}).(latent_scores{ii})(nn,iii) = output.([subsets{s}, '_correlations']).(LVs{i}).(latent_scores{ii}).(groups{iii}).(subfields{iiii})(2); + nn=nn+1; + end + end + end + end + output.(['tables_', subsets{s}, '_Rho_p_labels']) = subfields; + output.(['tables_', subsets{s}, '_Rho_p_names']) = groups'; +end + + +save(IN.results_path, 'input', 'output', 'setup'); + +end + + + + diff --git a/Visualization_Module/dp_sociodemographic_2020.m b/Visualization_Module/dp_sociodemographic_2020.m new file mode 100644 index 0000000..80c0cf9 --- /dev/null +++ b/Visualization_Module/dp_sociodemographic_2020.m @@ -0,0 +1,524 @@ +%% DP script for retrieving sociodemographic data +function [input, output, setup]= dp_sociodemographic_2020(IN) + +load(IN.results_path); + +addpath(genpath('/opt/NM/NeuroMiner_Release/')); +addpath('/opt/SPM/spm12_v6685_cat12_r1207/'); +addpath(genpath('/volume/DP_FEF/ScrFun/ScriptsRepository')); + +if contains(IN.results_path, 'CTQ') + load('/volume/HCStress/Data/Stress_SPLS_DP/Stress_SPLS_DP/DATA/17-Jul-2018/Stress_SPLS_DP_data_table_NM_17-Jul-2018.mat') +elseif contains(IN.results_path, 'CISS') + load('/volume/HCStress/Data/BHAM/All_Birmingham_IncludingBrain_data_table_NM_01-Mar-2018.mat') +elseif contains(IN.results_path, 'WSS') + load('/volume/data/PRONIA/DataDump/03-Apr-2020/QueryData/Munich/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc/DATA/19-May-2020/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc_data_table_NM_19-May-2020.mat') +else + disp('Something''s wrong!'); +end + +load('/volume/HCStress/Data/EHI_data_for_req_PSN.mat'); +load('/volume/DP_FEF/WHOQOL_data.mat'); +EHI_01=[]; + +%% solve issues with w iteration for final parameters +if contains(IN.results_path, 'final_vis') + for i=1:size(output.final_parameters,1) + log_find = [output.opt_parameters.(['LV_', num2str(i)]){:,7}] == output.final_parameters{i,7}; + if sum(log_find)>1 + temp = output.opt_parameters.(['LV_', num2str(i)])(log_find,:); + log_find = [temp{:,2}] == output.final_parameters{i,2}; + output.final_parameters{i,1} = temp{log_find,1}; + else + output.final_parameters{i,1} = output.opt_parameters.(['LV_', num2str(i)]){log_find,1}; + end + end +end + +%% load and add additional handedness data +for i=1:size(input.data_collection.PSN_BOG,1) + try + EHI_01(i,1)=cell2mat(data_table.EHI_01(strcmp(data_table.PATIENT_ID, input.data_collection.PSN_BOG{i,1}))); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + EHI_01(i,1)=NaN; % EHI am Ende hinzufügen + end +end + +temp = load(input.MRI); +field = fieldnames(temp); +MRI_for_analysis = temp.(field{1}); + +%% get data +% for i=1:size(input.data_collection.PSN_BOG,1) +data_table_selected=[]; +for i=1:size(input.data_collection.PSN_BOG,1) + try data_table_selected(i,:)=data_table_NM(contains(ID_name, input.data_collection.PSN_BOG{i,1}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + data_table_selected(i,:)=NaN; + end +end + +sociodem_variables = {'DEMOG_T0T1T2_31AA_EducationYears_T0', 'GF_S_1_Current_T0',... + 'GF_R_1_Current_T0', 'GAF_S_PastMonth_Screening', 'GAF_DI_PastMonth_Screening'}; + +sociodem_collection=[]; sociodem_collection_names={}; +for i=1:size(sociodem_variables,2) + temp = data_table_selected(:,contains(data_table_names_NM, sociodem_variables{i})); + sociodem_collection=[sociodem_collection,temp]; + temp_names = data_table_names_NM(contains(data_table_names_NM, sociodem_variables{i})); + sociodem_collection_names=[sociodem_collection_names,temp_names]; +end + +% clinical_collection = clinical_collection(:,contains(clinical_collection_names, 'T0')); +% clinical_collection_names = clinical_collection_names(:,contains(clinical_collection_names, 'T0')); + +% compute PANSS scores +panss_total_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0','PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0','PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; +panss_pos_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0'}; +panss_neg_names = {'PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0'}; +panss_gen_names = {'PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; + +% panss positive scores +panss_total = sum(data_table_selected(:,ismember(data_table_names_NM, panss_total_names)),2); +panss_pos = sum(data_table_selected(:,ismember(data_table_names_NM, panss_pos_names)),2); +panss_neg = sum(data_table_selected(:,ismember(data_table_names_NM, panss_neg_names)),2); +panss_gen = sum(data_table_selected(:,ismember(data_table_names_NM, panss_gen_names)),2); + +% compute BDI scores +BDI_names = {'BDI2_01_T0','BDI2_02_T0','BDI2_03_T0','BDI2_04_T0','BDI2_05_T0',... + 'BDI2_06_T0','BDI2_07_T0','BDI2_08_T0','BDI2_09_T0','BDI2_10_T0','BDI2_11_T0',... + 'BDI2_12_T0','BDI2_13_T0','BDI2_14_T0','BDI2_15_T0','BDI2_16_T0','BDI2_17_T0',... + 'BDI2_18_T0','BDI2_19_T0','BDI2_20_T0','BDI2_21_T0'}; + +BDI_scores = sum(data_table_selected(:,ismember(data_table_names_NM, BDI_names)),2); + +clinical_collection = [panss_total, panss_pos, panss_neg, panss_gen, BDI_scores]; +clinical_collection_names = {'PANSS_total', 'PANSS_pos', 'PANSS_neg', 'PANSS_gen', 'BDI_scores'}; + +output.socio_clin_data.collection = [sociodem_collection, clinical_collection]; +output.socio_clin_data.collection_names = [sociodem_collection_names, clinical_collection_names]; + +output.socio_clin_data.means_std=[]; + +for i=1:size(unique(input.data_collection.Diag),1) + output.socio_clin_data.means_std(1:2:size(output.socio_clin_data.collection,2)*2,i) = nanmean(output.socio_clin_data.collection(input.data_collection.Diag==i,:),1)'; + output.socio_clin_data.means_std(2:2:size(output.socio_clin_data.collection,2)*2,i) = nanstd(output.socio_clin_data.collection(input.data_collection.Diag==i,:),1)'; +end + +temp_all=[]; +temp_all(1:2:size(output.socio_clin_data.collection,2)*2,1) = nanmean(output.socio_clin_data.collection,1)'; +temp_all(2:2:size(output.socio_clin_data.collection,2)*2,1) = nanstd(output.socio_clin_data.collection,1)'; + +output.socio_clin_data.means_std = [temp_all, output.socio_clin_data.means_std]; +output.socio_clin_data.names = output.socio_clin_data.collection_names'; +output.socio_clin_data.labels = ['all', input.data_collection.Diagfull_names]; + +% get site numbers for diagnoses +for i=1:size(input.sites,2) + [~, ~, ic_sites] = unique(input.data_collection.Diag(input.sites(:,i)>0)); + output.sites_data.counts(i,:) = accumarray(ic_sites, 1)'; +end + +[~, ~, ic_sites] = unique(input.data_collection.Diag); +temp_all = accumarray(ic_sites, 1)'; + +output.sites_data.counts = [temp_all; output.sites_data.counts]; +output.sites_data.names = input.data_collection.Diagfull_names; +output.sites_data.labels = input.sites_names; + +% test for site imbalances +output.sites_data.chi2_p = chi2cdf(output.sites_data.counts(2:end,:),2); + +%% test for significant differences between groups +% first test with ANOVA for overall differences + +for i=1:size(output.socio_clin_data.names,1) + [p,tbl,stats] = kruskalwallis(output.socio_clin_data.collection(:,i), input.data_collection.Labels); + KW_results(i,:) = [p,tbl{2,5}]; + close all + output.socio_clin_data.KW_results(i,:) = [tbl{2,5}, p]; + output.socio_clin_data.KW_results_ext(i,:) = {p,tbl,stats}; + [output.socio_clin_data.Dunn_results{i,1}, ~, ~, output.socio_clin_data.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all +end + +%% look for specific correlations + +output.post_hoc_correlations.data_collection=[]; +output.post_hoc_correlations.names=[]; + +if any(strcmp(IN.SD_selection, 'GAF')) + + % get GAF data + GAF_names = {'GAF_S_LifeTime_Screening','GAF_S_PastYearT0_Screening',... + 'GAF_S_PastMonth_Screening','GAF_DI_LifeTime_Screening','GAF_DI_PastYear_Screening',... + 'GAF_DI_PastMonth_Screening', 'GF_S_1_Current_T0','GF_S_2_LowPastYearT0_T0',... + 'GF_S_3_HighPastYearT0_T0','GF_S_4_HighLifetimeT0_T0', 'GF_R_1_Current_T0',... + 'GF_R_2_LowPastYearT0_T0','GF_R_3_HighPastYearT0_T0','GF_R_4_HighLifetimeT0_T0'}; + + % GAF_variables = data_table_names_NM(~cellfun(@isempty,(strfind(data_table_names_NM, 'GAF')))); + GAF_data=[]; + for i=1:size(GAF_names,2) + temp = data_table_selected(:, contains(data_table_names_NM, GAF_names{i})); + GAF_data=[GAF_data,temp]; + end + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, GAF_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, GAF_data]; +end + +if any(strcmp(IN.SD_selection, 'BDI')) + + % get BDI data + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, 'BDI']; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, BDI_scores]; + +end + +if any(strcmp(IN.SD_selection, 'NEO')) + + % get NEO FFI data + neo_log = ~cellfun(@isempty,(strfind(data_table_names_NM, 'NEO'))); + neo_names = data_table_names_NM(neo_log); + neo_data_temp = data_table_selected(:, neo_log); + + NEO.neuroticism.negative_affect = [1, 11, 16, 31, 46]; + NEO.neuroticism.self_reproach = [6, 21, 26, 36, 41, 51, 56]; + NEO.extraversion.positive_affect = [7, 12, 37, 42]; + NEO.extraversion.sociability = [2, 17, 27, 57]; + NEO.extraversion.activity = [22, 32, 47, 52]; + NEO.openness.aesthetic_interests = [13, 23, 43]; + NEO.openness.intellectual_interests = [33, 48, 53, 58]; + NEO.openness.unconventionality = [3,8,18, 28, 38]; + NEO.agreeableness.nonantagonistic_orientation = [9,14,19,24,29,44,54,59]; + NEO.agreeableness.prosocial_orientation = [4, 34, 39, 49]; + NEO.conscientiousness.orderliness = [5,10,15,30,55]; + NEO.conscientiousness.goal_striving = [25, 35, 60]; + NEO.conscientiousness.dependability = [20, 40, 45, 50]; + NEO_inverse_questions = [1,16,31,46,12,42,27,57,23,33,48,3,8,18,38,9,14,24,29,44,54,59,39,15,30,55,45]; + NEO_inverse_algorithm = [1:1:5;5:-1:1]; + NEO_poorly_functioning = [6,12,27,42,3,8,28,38,9,19,24,29,34,15]; + + %exclude poorly functioning questions (optional) + fields=fieldnames(NEO); + for i=1:size(fields,1) + fields1=fieldnames(NEO.(fields{i})); + for ii=1:size(fields1,1) + temp = NEO.(fields{i}).(fields1{ii}); + log = ismember(temp, NEO_poorly_functioning); + temp(log)=[]; + NEO.(fields{i}).(fields1{ii})=temp; + end + end + + neo_data_inv=[]; + for i=1:size(neo_data_temp,1) + temp_row = neo_data_temp(i,:); + for ii=1:size(temp_row,2) + if sum(ii==NEO_inverse_questions)>0 + try + temp_row(ii)=NEO_inverse_algorithm(2,temp_row(ii)==NEO_inverse_algorithm(1,:)); + catch + temp_row(ii)=NaN; + end + end + end + neo_data_inv(i,:)=temp_row; + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(neo_data_inv),2)<=(0.2*size(neo_data_inv,2)); + temp_neo_data_inv = dp_impute(neo_data_inv(log_impute,:), 'euclidean'); + neo_data_inv_imp = neo_data_inv; + neo_data_inv_imp(log_impute,:) = temp_neo_data_inv; + + % compute sum scores + fields = fieldnames(NEO); + for i=1:size(fields,1) + fields1 = fieldnames(NEO.(fields{i})); + temp_collect.(fields{i})=[]; + for ii=1:size(fields1,1) + temp_collect.(fields{i}) = [temp_collect.(fields{i}), NEO.(fields{i}).(fields1{ii})]; + end + end + + fields=fieldnames(temp_collect); + neo_names=[]; + for i=1:size(fields,1) + neo_names=[neo_names, {['NEO_', fields{i}]}]; + end + + neo_data_foranalysis = neo_data_inv_imp; + for i=1:size(neo_data_foranalysis,1) + temp_sum=[]; + for ii=1:size(fields,1) + temp_sum = [temp_sum, sum(neo_data_foranalysis(i,temp_collect.(fields{ii})))]; + end + neo_data_sum(i,:)=temp_sum; + end + + % add NEO FFI data to corr data + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names,neo_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection,neo_data_sum]; +end + +% get QUOL data +if any(strcmp(IN.SD_selection, 'QOL')) + WHO_table = [HC;PAT]; + + for i=1:26 + if i<10 + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_0', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_0', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + else + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + end + end + + for i=1:size(WHO_raw,1) + temp=WHO_raw(i,:); + log=cellfun(@isempty, temp); + temp(log)={num2str(NaN)}; + WHO_raw(i,:)=temp; + for ii=1:size(temp,2) + try + temp_new(i,ii)=str2num(temp{ii}); + catch + temp_new(i,ii)=NaN; + end + end + end + + WHO_new=temp_new; + + WHO_ID=[HC.PATIENT_ID; PAT.PATIENT_ID]; + + WHO_selected=[]; + for i=1:size(input.data_collection.PSN_BOG,1) + try + WHO_selected(i,:)=WHO_new(strcmp(WHO_ID, input.data_collection.PSN_BOG{i}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + WHO_selected(i,:)=NaN; + end + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(WHO_selected),2)<=(0.2*size(WHO_selected,2)); + temp_WHO_selected = dp_impute(WHO_selected(log_impute,:), 'euclidean'); + WHO_selected_imp = WHO_selected; + WHO_selected_imp(log_impute,:) = temp_WHO_selected; + + % compute sum scores + WHO.physical = [3, 4, 10, 15, 16, 17, 18]; + WHO.psychosocial = [5, 6, 7, 11, 19, 26]; + WHO.social_relationship = [20, 21, 22]; + WHO.environment = [8, 9, 12, 13, 14, 23, 24, 25]; + + WHO_sum = [sum(WHO_selected_imp(:,WHO.physical),2), sum(WHO_selected_imp(:,WHO.psychosocial),2),... + sum(WHO_selected_imp(:,WHO.social_relationship),2), sum(WHO_selected_imp(:,WHO.environment),2)]; + + WHO_names = {'WHO_physical', 'WHO_psychosocial', 'WHO_social_relationship', 'WHO_environment'}; + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names,WHO_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection,WHO_sum]; +end + +output.post_hoc_correlations.data_table = array2table(output.post_hoc_correlations.data_collection, 'RowNames', input.data_collection.PSN_BOG(:,1)); +output.post_hoc_correlations.data_table.Properties.VariableNames = output.post_hoc_correlations.names; + +%% proper correlations +log_epsilon = strcmp(output.parameters_names, 'epsilon'); +log_omega = strcmp(output.parameters_names, 'omega'); +log_epsilon_all = strcmp(output.parameters_names, 'epsilon_all'); +log_omega_all = strcmp(output.parameters_names, 'omega_all'); +output.post_hoc_correlations.correlations.test.RHO=[]; +output.post_hoc_correlations.correlations.test.p=[]; +output.post_hoc_correlations.correlations.validation.RHO=[]; +output.post_hoc_correlations.correlations.validation.p=[]; +output.post_hoc_mdl.test.R2=[]; +output.post_hoc_mdl.test.p=[]; +% output.post_hoc_mdl.test.mdl=[]; +output.post_hoc_mdl.validation.R2=[]; +output.post_hoc_mdl.validation.p=[]; +% output.post_hoc_mdl.validation.mdl=[]; +log_coll = {log_epsilon, log_epsilon_all; log_omega, log_omega_all; 'test', 'all'}; + +for l=1:size(log_coll,2) + for i=1:(size(output.final_parameters,1)-1) + + epsilon = output.final_parameters{i,log_coll{1,i}}; + omega = output.final_parameters{i,log_coll{2,i}}; + X = [epsilon, omega]; + + if input.selection_train == 1 + if strcmp(log_coll{3,i}, 'test') + y = output.post_hoc_correlations.data_collection(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},:); + else + idx = []; + for ii=1:size(output.CV.cv_outer_indices.TestInd,2) + idx = [idx; output.CV.cv_outer_indices.TestInd{1,ii}]; + end + y = output.post_hoc_correlations.data_collection(idx,:); + end + elseif input.selection_train == 2 + idx = []; + for ii=1:size(output.CV.cv_outer_indices.TestInd,2) + idx = [idx; output.CV.cv_outer_indices.TestInd{1,ii}]; + end + y = output.post_hoc_correlations.data_collection(idx,:); + end + + [RHO,p] = corr(X, y, 'Rows', 'complete', 'Type', 'Spearman'); + % [p(1,:), ~] = dp_FDR_adj(p(1,:)); + % [p(2,:), ~] = dp_FDR_adj(p(2,:)); + + output.post_hoc_correlations.correlations.test.RHO = [output.post_hoc_correlations.correlations.test.RHO, RHO']; + output.post_hoc_correlations.correlations.test.p = [output.post_hoc_correlations.correlations.test.p, p']; + + p=[]; R2=[]; mdl_coll={}; + + for v=1:size(y,2) + mdl = fitlm(X, y(:,v)); + p(v,1) = mdl.coefTest; + R2(v,1) = mdl.Rsquared.Adjusted; + % mdl_coll = [mdl_coll; {mdl}]; + end + + output.post_hoc_mdl.test.R2 = [output.post_hoc_mdl.test.R2, R2]; + output.post_hoc_mdl.test.p = [output.post_hoc_mdl.test.p, p]; + % output.post_hoc_mdl.test.mdl = [output.post_hoc_mdl.test.mdl, mdl_coll]; + + % [output.post_hoc_mdl.test.p, ~] = dp_FDR_adj(output.post_hoc_mdl.test.p); + + if ~islogical(input.validation_set) + epsilon = output.validation_results{i,log_epsilon}; + omega = output.validation_results{i,log_omega}; + X = [epsilon, omega]; + y = output.post_hoc_correlations.data_collection(output.validation_indices.TestInd{1,1},:); + [RHO,p] = corr(X, y, 'Rows', 'complete', 'Type', 'Spearman'); + % [p(1,:), ~] = dp_FDR_adj(p(1,:)); + % [p(2,:), ~] = dp_FDR_adj(p(2,:)); + + output.post_hoc_correlations.correlations.validation.RHO = [output.post_hoc_correlations.correlations.validation.RHO, RHO']; + output.post_hoc_correlations.correlations.validation.p = [output.post_hoc_correlations.correlations.validation.p, p']; + + p=[]; R2=[];mdl_coll={}; + + for v=1:size(y,2) + mdl = fitlm(X, y(:,v)); + p(v,1) = mdl.coefTest; + R2(v,1) = mdl.Rsquared.Adjusted; + % mdl_coll=[mdl_coll; {mdl}]; + end + + output.post_hoc_mdl.validation.R2 = [output.post_hoc_mdl.validation.R2, R2]; + output.post_hoc_mdl.validation.p = [output.post_hoc_mdl.validation.p, p]; + % output.post_hoc_mdl.validation.mdl = [output.post_hoc_mdl.validation.mdl, mdl_coll]; + + % [output.post_hoc_mdl.validation.p, ~] = dp_FDR_adj(output.post_hoc_mdl.validation.p); + + end + + end + + output.post_hoc_correlations.correlations.test.p = dp_FDR_adj(output.post_hoc_correlations.correlations.test.p); + output.post_hoc_correlations.correlations.validation.p = dp_FDR_adj(output.post_hoc_correlations.correlations.validation.p); + output.post_hoc_mdl.test.p = dp_FDR_adj(output.post_hoc_mdl.test.p); + output.post_hoc_mdl.validation.p = dp_FDR_adj(output.post_hoc_mdl.validation.p); + + output.post_hoc_correlations.correlations.test.table_RHO = array2table(output.post_hoc_correlations.correlations.test.RHO, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_correlations.correlations.test.table_p = array2table(output.post_hoc_correlations.correlations.test.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.test.table_p = array2table(output.post_hoc_mdl.test.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.test.table_R2 = array2table(output.post_hoc_mdl.test.R2, 'RowNames', output.post_hoc_correlations.names); + + temp=[];temp_names={};nn=1; + for rr=1:size(output.post_hoc_correlations.correlations.test.RHO,1) + temp(nn,:) = output.post_hoc_correlations.correlations.test.RHO(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_RHO']; + nn=nn+1; + temp(nn,:) = output.post_hoc_correlations.correlations.test.p(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_p']; + nn=nn+1; + end + + output.post_hoc_correlations.correlations.test.table_RHO_p = array2table(temp, 'RowNames', temp_names); + + if ~islogical(input.validation_set) + output.post_hoc_correlations.correlations.validation.table_RHO = array2table(output.post_hoc_correlations.correlations.validation.RHO, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_correlations.correlations.validation.table_p = array2table(output.post_hoc_correlations.correlations.validation.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.validation.table_p = array2table(output.post_hoc_mdl.validation.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.validation.table_R2 = array2table(output.post_hoc_mdl.validation.R2, 'RowNames', output.post_hoc_correlations.names); + end + + temp=[];temp_names={};nn=1; + for rr=1:size(output.post_hoc_correlations.correlations.validation.RHO,1) + temp(nn,:) = output.post_hoc_correlations.correlations.validation.RHO(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_RHO']; + nn=nn+1; + temp(nn,:) = output.post_hoc_correlations.correlations.validation.p(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_p']; + nn=nn+1; + end + + output.post_hoc_correlations.correlations.validation.table_RHO_p = array2table(temp, 'RowNames', temp_names); + + nn=1;temp_vars={}; + for i=1:size(output.post_hoc_correlations.correlations.test.table_RHO,2)/2 + temp_vars{1,nn} = ['epsilon_LV', num2str(i)]; + nn=nn+1; + temp_vars{1,nn} = ['omega_LV', num2str(i)]; + nn=nn+1; + end + + temp_vars_names={}; + for i=1:size(output.post_hoc_correlations.correlations.test.table_RHO,2)/2 + temp_vars_names{1,i} = ['LV', num2str(i), '_latent_scores']; + end + + output.post_hoc_correlations.correlations.test.table_RHO.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.test.table_p.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.test.table_RHO_p.Properties.VariableNames = temp_vars; + output.post_hoc_mdl.test.table_p.Properties.VariableNames = temp_vars_names; + output.post_hoc_mdl.test.table_R2.Properties.VariableNames = temp_vars_names; + + if ~islogical(input.validation_set) + + output.post_hoc_correlations.correlations.validation.table_RHO.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.validation.table_p.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.validation.table_RHO_p.Properties.VariableNames = temp_vars; + output.post_hoc_mdl.validation.table_p.Properties.VariableNames = temp_vars_names; + output.post_hoc_mdl.validation.table_R2.Properties.VariableNames = temp_vars_names; + + end + + % save(IN.results_path, 'input', 'output', 'setup'); + +end + +end + + diff --git a/Visualization_Module/dp_sociodemographic_2020.m~ b/Visualization_Module/dp_sociodemographic_2020.m~ new file mode 100644 index 0000000..6814abc --- /dev/null +++ b/Visualization_Module/dp_sociodemographic_2020.m~ @@ -0,0 +1,519 @@ +%% DP script for retrieving sociodemographic data +function [input, output, setup]= dp_sociodemographic_2020(IN) + +load(IN.results_path); + +addpath(genpath('/opt/NM/NeuroMiner_Release/')); +addpath('/opt/SPM/spm12_v6685_cat12_r1207/'); +addpath(genpath('/volume/DP_FEF/ScrFun/ScriptsRepository')); + +if contains(IN.results_path, 'CTQ') + load('/volume/HCStress/Data/Stress_SPLS_DP/Stress_SPLS_DP/DATA/17-Jul-2018/Stress_SPLS_DP_data_table_NM_17-Jul-2018.mat') +elseif contains(IN.results_path, 'CISS') + load('/volume/HCStress/Data/BHAM/All_Birmingham_IncludingBrain_data_table_NM_01-Mar-2018.mat') +elseif contains(IN.results_path, 'WSS') + load('/volume/data/PRONIA/DataDump/03-Apr-2020/QueryData/Munich/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc/DATA/19-May-2020/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc_data_table_NM_19-May-2020.mat') +else + disp('Something''s wrong!'); +end + +load('/volume/HCStress/Data/EHI_data_for_req_PSN.mat'); +load('/volume/DP_FEF/WHOQOL_data.mat'); +EHI_01=[]; + +%% solve issues with w iteration for final parameters +if contains(IN.results_path, 'final_vis') + for i=1:size(output.final_parameters,1) + log_find = [output.opt_parameters.(['LV_', num2str(i)]){:,7}] == output.final_parameters{i,7}; + if sum(log_find)>1 + temp = output.opt_parameters.(['LV_', num2str(i)])(log_find,:); + log_find = [temp{:,2}] == output.final_parameters{i,2}; + output.final_parameters{i,1} = temp{log_find,1}; + else + output.final_parameters{i,1} = output.opt_parameters.(['LV_', num2str(i)]){log_find,1}; + end + end +end + +%% load and add additional handedness data +for i=1:size(input.data_collection.PSN_BOG,1) + try + EHI_01(i,1)=cell2mat(data_table.EHI_01(strcmp(data_table.PATIENT_ID, input.data_collection.PSN_BOG{i,1}))); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + EHI_01(i,1)=NaN; % EHI am Ende hinzufügen + end +end + +temp = load(input.MRI); +field = fieldnames(temp); +MRI_for_analysis = temp.(field{1}); + +%% get data +% for i=1:size(input.data_collection.PSN_BOG,1) +data_table_selected=[]; +for i=1:size(input.data_collection.PSN_BOG,1) + try data_table_selected(i,:)=data_table_NM(contains(ID_name, input.data_collection.PSN_BOG{i,1}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + data_table_selected(i,:)=NaN; + end +end + +sociodem_variables = {'DEMOG_T0T1T2_31AA_EducationYears_T0', 'GF_S_1_Current_T0',... + 'GF_R_1_Current_T0', 'GAF_S_PastMonth_Screening', 'GAF_DI_PastMonth_Screening'}; + +sociodem_collection=[]; sociodem_collection_names={}; +for i=1:size(sociodem_variables,2) + temp = data_table_selected(:,contains(data_table_names_NM, sociodem_variables{i})); + sociodem_collection=[sociodem_collection,temp]; + temp_names = data_table_names_NM(contains(data_table_names_NM, sociodem_variables{i})); + sociodem_collection_names=[sociodem_collection_names,temp_names]; +end + +% clinical_collection = clinical_collection(:,contains(clinical_collection_names, 'T0')); +% clinical_collection_names = clinical_collection_names(:,contains(clinical_collection_names, 'T0')); + +% compute PANSS scores +panss_total_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0','PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0','PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; +panss_pos_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0'}; +panss_neg_names = {'PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0'}; +panss_gen_names = {'PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; + +% panss positive scores +panss_total = sum(data_table_selected(:,ismember(data_table_names_NM, panss_total_names)),2); +panss_pos = sum(data_table_selected(:,ismember(data_table_names_NM, panss_pos_names)),2); +panss_neg = sum(data_table_selected(:,ismember(data_table_names_NM, panss_neg_names)),2); +panss_gen = sum(data_table_selected(:,ismember(data_table_names_NM, panss_gen_names)),2); + +% compute BDI scores +BDI_names = {'BDI2_01_T0','BDI2_02_T0','BDI2_03_T0','BDI2_04_T0','BDI2_05_T0',... + 'BDI2_06_T0','BDI2_07_T0','BDI2_08_T0','BDI2_09_T0','BDI2_10_T0','BDI2_11_T0',... + 'BDI2_12_T0','BDI2_13_T0','BDI2_14_T0','BDI2_15_T0','BDI2_16_T0','BDI2_17_T0',... + 'BDI2_18_T0','BDI2_19_T0','BDI2_20_T0','BDI2_21_T0'}; + +BDI_scores = sum(data_table_selected(:,ismember(data_table_names_NM, BDI_names)),2); + +clinical_collection = [panss_total, panss_pos, panss_neg, panss_gen, BDI_scores]; +clinical_collection_names = {'PANSS_total', 'PANSS_pos', 'PANSS_neg', 'PANSS_gen', 'BDI_scores'}; + +output.socio_clin_data.collection = [sociodem_collection, clinical_collection]; +output.socio_clin_data.collection_names = [sociodem_collection_names, clinical_collection_names]; + +output.socio_clin_data.means_std=[]; + +for i=1:size(unique(input.data_collection.Diag),1) + output.socio_clin_data.means_std(1:2:size(output.socio_clin_data.collection,2)*2,i) = nanmean(output.socio_clin_data.collection(input.data_collection.Diag==i,:),1)'; + output.socio_clin_data.means_std(2:2:size(output.socio_clin_data.collection,2)*2,i) = nanstd(output.socio_clin_data.collection(input.data_collection.Diag==i,:),1)'; +end + +temp_all=[]; +temp_all(1:2:size(output.socio_clin_data.collection,2)*2,1) = nanmean(output.socio_clin_data.collection,1)'; +temp_all(2:2:size(output.socio_clin_data.collection,2)*2,1) = nanstd(output.socio_clin_data.collection,1)'; + +output.socio_clin_data.means_std = [temp_all, output.socio_clin_data.means_std]; +output.socio_clin_data.names = output.socio_clin_data.collection_names'; +output.socio_clin_data.labels = ['all', input.data_collection.Diagfull_names]; + +% get site numbers for diagnoses +for i=1:size(input.sites,2) + [~, ~, ic_sites] = unique(input.data_collection.Diag(input.sites(:,i)>0)); + output.sites_data.counts(i,:) = accumarray(ic_sites, 1)'; +end + +[~, ~, ic_sites] = unique(input.data_collection.Diag); +temp_all = accumarray(ic_sites, 1)'; + +output.sites_data.counts = [temp_all; output.sites_data.counts]; +output.sites_data.names = input.data_collection.Diagfull_names; +output.sites_data.labels = input.sites_names; + +% test for site imbalances +output.sites_data.chi2_p = chi2cdf(output.sites_data.counts(2:end,:),2); + +%% test for significant differences between groups +% first test with ANOVA for overall differences + +for i=1:size(output.socio_clin_data.names,1) + [p,tbl,stats] = kruskalwallis(output.socio_clin_data.collection(:,i), input.data_collection.Labels); + KW_results(i,:) = [p,tbl{2,5}]; + close all + output.socio_clin_data.KW_results(i,:) = [tbl{2,5}, p]; + output.socio_clin_data.KW_results_ext(i,:) = {p,tbl,stats}; + [output.socio_clin_data.Dunn_results{i,1}, ~, ~, output.socio_clin_data.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all +end + +%% look for specific correlations + +output.post_hoc_correlations.data_collection=[]; +output.post_hoc_correlations.names=[]; + +if any(strcmp(IN.SD_selection, 'GAF')) + + % get GAF data + GAF_names = {'GAF_S_LifeTime_Screening','GAF_S_PastYearT0_Screening',... + 'GAF_S_PastMonth_Screening','GAF_DI_LifeTime_Screening','GAF_DI_PastYear_Screening',... + 'GAF_DI_PastMonth_Screening', 'GF_S_1_Current_T0','GF_S_2_LowPastYearT0_T0',... + 'GF_S_3_HighPastYearT0_T0','GF_S_4_HighLifetimeT0_T0', 'GF_R_1_Current_T0',... + 'GF_R_2_LowPastYearT0_T0','GF_R_3_HighPastYearT0_T0','GF_R_4_HighLifetimeT0_T0'}; + + % GAF_variables = data_table_names_NM(~cellfun(@isempty,(strfind(data_table_names_NM, 'GAF')))); + GAF_data=[]; + for i=1:size(GAF_names,2) + temp = data_table_selected(:, contains(data_table_names_NM, GAF_names{i})); + GAF_data=[GAF_data,temp]; + end + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, GAF_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, GAF_data]; +end + +if any(strcmp(IN.SD_selection, 'BDI')) + + % get BDI data + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, 'BDI']; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, BDI_scores]; + +end + +if any(strcmp(IN.SD_selection, 'NEO')) + + % get NEO FFI data + neo_log = ~cellfun(@isempty,(strfind(data_table_names_NM, 'NEO'))); + neo_names = data_table_names_NM(neo_log); + neo_data_temp = data_table_selected(:, neo_log); + + NEO.neuroticism.negative_affect = [1, 11, 16, 31, 46]; + NEO.neuroticism.self_reproach = [6, 21, 26, 36, 41, 51, 56]; + NEO.extraversion.positive_affect = [7, 12, 37, 42]; + NEO.extraversion.sociability = [2, 17, 27, 57]; + NEO.extraversion.activity = [22, 32, 47, 52]; + NEO.openness.aesthetic_interests = [13, 23, 43]; + NEO.openness.intellectual_interests = [33, 48, 53, 58]; + NEO.openness.unconventionality = [3,8,18, 28, 38]; + NEO.agreeableness.nonantagonistic_orientation = [9,14,19,24,29,44,54,59]; + NEO.agreeableness.prosocial_orientation = [4, 34, 39, 49]; + NEO.conscientiousness.orderliness = [5,10,15,30,55]; + NEO.conscientiousness.goal_striving = [25, 35, 60]; + NEO.conscientiousness.dependability = [20, 40, 45, 50]; + NEO_inverse_questions = [1,16,31,46,12,42,27,57,23,33,48,3,8,18,38,9,14,24,29,44,54,59,39,15,30,55,45]; + NEO_inverse_algorithm = [1:1:5;5:-1:1]; + NEO_poorly_functioning = [6,12,27,42,3,8,28,38,9,19,24,29,34,15]; + + %exclude poorly functioning questions (optional) + fields=fieldnames(NEO); + for i=1:size(fields,1) + fields1=fieldnames(NEO.(fields{i})); + for ii=1:size(fields1,1) + temp = NEO.(fields{i}).(fields1{ii}); + log = ismember(temp, NEO_poorly_functioning); + temp(log)=[]; + NEO.(fields{i}).(fields1{ii})=temp; + end + end + + neo_data_inv=[]; + for i=1:size(neo_data_temp,1) + temp_row = neo_data_temp(i,:); + for ii=1:size(temp_row,2) + if sum(ii==NEO_inverse_questions)>0 + try + temp_row(ii)=NEO_inverse_algorithm(2,temp_row(ii)==NEO_inverse_algorithm(1,:)); + catch + temp_row(ii)=NaN; + end + end + end + neo_data_inv(i,:)=temp_row; + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(neo_data_inv),2)<=(0.2*size(neo_data_inv,2)); + temp_neo_data_inv = dp_impute(neo_data_inv(log_impute,:), 'euclidean'); + neo_data_inv_imp = neo_data_inv; + neo_data_inv_imp(log_impute,:) = temp_neo_data_inv; + + % compute sum scores + fields = fieldnames(NEO); + for i=1:size(fields,1) + fields1 = fieldnames(NEO.(fields{i})); + temp_collect.(fields{i})=[]; + for ii=1:size(fields1,1) + temp_collect.(fields{i}) = [temp_collect.(fields{i}), NEO.(fields{i}).(fields1{ii})]; + end + end + + fields=fieldnames(temp_collect); + neo_names=[]; + for i=1:size(fields,1) + neo_names=[neo_names, {['NEO_', fields{i}]}]; + end + + neo_data_foranalysis = neo_data_inv_imp; + for i=1:size(neo_data_foranalysis,1) + temp_sum=[]; + for ii=1:size(fields,1) + temp_sum = [temp_sum, sum(neo_data_foranalysis(i,temp_collect.(fields{ii})))]; + end + neo_data_sum(i,:)=temp_sum; + end + + % add NEO FFI data to corr data + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names,neo_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection,neo_data_sum]; +end + +% get QUOL data +if any(strcmp(IN.SD_selection, 'QOL')) + WHO_table = [HC;PAT]; + + for i=1:26 + if i<10 + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_0', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_0', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + else + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + end + end + + for i=1:size(WHO_raw,1) + temp=WHO_raw(i,:); + log=cellfun(@isempty, temp); + temp(log)={num2str(NaN)}; + WHO_raw(i,:)=temp; + for ii=1:size(temp,2) + try + temp_new(i,ii)=str2num(temp{ii}); + catch + temp_new(i,ii)=NaN; + end + end + end + + WHO_new=temp_new; + + WHO_ID=[HC.PATIENT_ID; PAT.PATIENT_ID]; + + WHO_selected=[]; + for i=1:size(input.data_collection.PSN_BOG,1) + try + WHO_selected(i,:)=WHO_new(strcmp(WHO_ID, input.data_collection.PSN_BOG{i}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + WHO_selected(i,:)=NaN; + end + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(WHO_selected),2)<=(0.2*size(WHO_selected,2)); + temp_WHO_selected = dp_impute(WHO_selected(log_impute,:), 'euclidean'); + WHO_selected_imp = WHO_selected; + WHO_selected_imp(log_impute,:) = temp_WHO_selected; + + % compute sum scores + WHO.physical = [3, 4, 10, 15, 16, 17, 18]; + WHO.psychosocial = [5, 6, 7, 11, 19, 26]; + WHO.social_relationship = [20, 21, 22]; + WHO.environment = [8, 9, 12, 13, 14, 23, 24, 25]; + + WHO_sum = [sum(WHO_selected_imp(:,WHO.physical),2), sum(WHO_selected_imp(:,WHO.psychosocial),2),... + sum(WHO_selected_imp(:,WHO.social_relationship),2), sum(WHO_selected_imp(:,WHO.environment),2)]; + + WHO_names = {'WHO_physical', 'WHO_psychosocial', 'WHO_social_relationship', 'WHO_environment'}; + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names,WHO_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection,WHO_sum]; +end + +output.post_hoc_correlations.data_table = array2table(output.post_hoc_correlations.data_collection, 'RowNames', input.data_collection.PSN_BOG(:,1)); +output.post_hoc_correlations.data_table.Properties.VariableNames = output.post_hoc_correlations.names; + +%% proper correlations +log_epsilon = strcmp(output.parameters_names, 'epsilon'); +log_omega = strcmp(output.parameters_names, 'omega'); +log_epsilon_all = strcmp(output.parameters_names, 'epsilon_all'); +log_omega_all = strcmp(output.parameters_names, 'omega_all'); +output.post_hoc_correlations.correlations.test.RHO=[]; +output.post_hoc_correlations.correlations.test.p=[]; +output.post_hoc_correlations.correlations.validation.RHO=[]; +output.post_hoc_correlations.correlations.validation.p=[]; +output.post_hoc_mdl.test.R2=[]; +output.post_hoc_mdl.test.p=[]; +% output.post_hoc_mdl.test.mdl=[]; +output.post_hoc_mdl.validation.R2=[]; +output.post_hoc_mdl.validation.p=[]; +% output.post_hoc_mdl.validation.mdl=[]; +log_coll = {log_epsilon, log_epsilon_all; log_omega, log_omega_all; 'test', 'all'}; + +for l=1:size(log_coll,2) + for i=1:(size(output.final_parameters,1)-1) + + epsilon = output.final_parameters{i,log_coll{1,i}}; + omega = output.final_parameters{i,log_coll{2,i}}; + X = [epsilon, omega]; + + if input.selection_train == 1 + if strcmp(log_coll{3,i}, 'test') + y = output.post_hoc_correlations.data_collection(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},:); + else + end + elseif input.selection_train == 2 + idx = []; + for ii=1:size(output.CV.cv_outer_indices.TestInd,2) + idx = [idx; output.CV.cv_outer_indices.TestInd{1,ii}]; + end + y = output.post_hoc_correlations.data_collection(idx,:); + end + + [RHO,p] = corr(X, y, 'Rows', 'complete', 'Type', 'Spearman'); + % [p(1,:), ~] = dp_FDR_adj(p(1,:)); + % [p(2,:), ~] = dp_FDR_adj(p(2,:)); + + output.post_hoc_correlations.correlations.test.RHO = [output.post_hoc_correlations.correlations.test.RHO, RHO']; + output.post_hoc_correlations.correlations.test.p = [output.post_hoc_correlations.correlations.test.p, p']; + + p=[]; R2=[]; mdl_coll={}; + + for v=1:size(y,2) + mdl = fitlm(X, y(:,v)); + p(v,1) = mdl.coefTest; + R2(v,1) = mdl.Rsquared.Adjusted; + % mdl_coll = [mdl_coll; {mdl}]; + end + + output.post_hoc_mdl.test.R2 = [output.post_hoc_mdl.test.R2, R2]; + output.post_hoc_mdl.test.p = [output.post_hoc_mdl.test.p, p]; + % output.post_hoc_mdl.test.mdl = [output.post_hoc_mdl.test.mdl, mdl_coll]; + + % [output.post_hoc_mdl.test.p, ~] = dp_FDR_adj(output.post_hoc_mdl.test.p); + + if ~islogical(input.validation_set) + epsilon = output.validation_results{i,log_epsilon}; + omega = output.validation_results{i,log_omega}; + X = [epsilon, omega]; + y = output.post_hoc_correlations.data_collection(output.validation_indices.TestInd{1,1},:); + [RHO,p] = corr(X, y, 'Rows', 'complete', 'Type', 'Spearman'); + % [p(1,:), ~] = dp_FDR_adj(p(1,:)); + % [p(2,:), ~] = dp_FDR_adj(p(2,:)); + + output.post_hoc_correlations.correlations.validation.RHO = [output.post_hoc_correlations.correlations.validation.RHO, RHO']; + output.post_hoc_correlations.correlations.validation.p = [output.post_hoc_correlations.correlations.validation.p, p']; + + p=[]; R2=[];mdl_coll={}; + + for v=1:size(y,2) + mdl = fitlm(X, y(:,v)); + p(v,1) = mdl.coefTest; + R2(v,1) = mdl.Rsquared.Adjusted; + % mdl_coll=[mdl_coll; {mdl}]; + end + + output.post_hoc_mdl.validation.R2 = [output.post_hoc_mdl.validation.R2, R2]; + output.post_hoc_mdl.validation.p = [output.post_hoc_mdl.validation.p, p]; + % output.post_hoc_mdl.validation.mdl = [output.post_hoc_mdl.validation.mdl, mdl_coll]; + + % [output.post_hoc_mdl.validation.p, ~] = dp_FDR_adj(output.post_hoc_mdl.validation.p); + + end + + end + + output.post_hoc_correlations.correlations.test.p = dp_FDR_adj(output.post_hoc_correlations.correlations.test.p); + output.post_hoc_correlations.correlations.validation.p = dp_FDR_adj(output.post_hoc_correlations.correlations.validation.p); + output.post_hoc_mdl.test.p = dp_FDR_adj(output.post_hoc_mdl.test.p); + output.post_hoc_mdl.validation.p = dp_FDR_adj(output.post_hoc_mdl.validation.p); + + output.post_hoc_correlations.correlations.test.table_RHO = array2table(output.post_hoc_correlations.correlations.test.RHO, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_correlations.correlations.test.table_p = array2table(output.post_hoc_correlations.correlations.test.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.test.table_p = array2table(output.post_hoc_mdl.test.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.test.table_R2 = array2table(output.post_hoc_mdl.test.R2, 'RowNames', output.post_hoc_correlations.names); + + temp=[];temp_names={};nn=1; + for rr=1:size(output.post_hoc_correlations.correlations.test.RHO,1) + temp(nn,:) = output.post_hoc_correlations.correlations.test.RHO(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_RHO']; + nn=nn+1; + temp(nn,:) = output.post_hoc_correlations.correlations.test.p(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_p']; + nn=nn+1; + end + + output.post_hoc_correlations.correlations.test.table_RHO_p = array2table(temp, 'RowNames', temp_names); + + if ~islogical(input.validation_set) + output.post_hoc_correlations.correlations.validation.table_RHO = array2table(output.post_hoc_correlations.correlations.validation.RHO, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_correlations.correlations.validation.table_p = array2table(output.post_hoc_correlations.correlations.validation.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.validation.table_p = array2table(output.post_hoc_mdl.validation.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.validation.table_R2 = array2table(output.post_hoc_mdl.validation.R2, 'RowNames', output.post_hoc_correlations.names); + end + + temp=[];temp_names={};nn=1; + for rr=1:size(output.post_hoc_correlations.correlations.validation.RHO,1) + temp(nn,:) = output.post_hoc_correlations.correlations.validation.RHO(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_RHO']; + nn=nn+1; + temp(nn,:) = output.post_hoc_correlations.correlations.validation.p(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_p']; + nn=nn+1; + end + + output.post_hoc_correlations.correlations.validation.table_RHO_p = array2table(temp, 'RowNames', temp_names); + + nn=1;temp_vars={}; + for i=1:size(output.post_hoc_correlations.correlations.test.table_RHO,2)/2 + temp_vars{1,nn} = ['epsilon_LV', num2str(i)]; + nn=nn+1; + temp_vars{1,nn} = ['omega_LV', num2str(i)]; + nn=nn+1; + end + + temp_vars_names={}; + for i=1:size(output.post_hoc_correlations.correlations.test.table_RHO,2)/2 + temp_vars_names{1,i} = ['LV', num2str(i), '_latent_scores']; + end + + output.post_hoc_correlations.correlations.test.table_RHO.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.test.table_p.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.test.table_RHO_p.Properties.VariableNames = temp_vars; + output.post_hoc_mdl.test.table_p.Properties.VariableNames = temp_vars_names; + output.post_hoc_mdl.test.table_R2.Properties.VariableNames = temp_vars_names; + + if ~islogical(input.validation_set) + + output.post_hoc_correlations.correlations.validation.table_RHO.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.validation.table_p.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.validation.table_RHO_p.Properties.VariableNames = temp_vars; + output.post_hoc_mdl.validation.table_p.Properties.VariableNames = temp_vars_names; + output.post_hoc_mdl.validation.table_R2.Properties.VariableNames = temp_vars_names; + + end + + % save(IN.results_path, 'input', 'output', 'setup'); + +end + +end + + diff --git a/Visualization_Module/dp_sociodemographic_2020_full.m b/Visualization_Module/dp_sociodemographic_2020_full.m new file mode 100644 index 0000000..4969f56 --- /dev/null +++ b/Visualization_Module/dp_sociodemographic_2020_full.m @@ -0,0 +1,684 @@ +%% DP script for retrieving sociodemographic data +function [input, output, setup]= dp_sociodemographic_2020_full(IN) + +load(IN.results_path); + +addpath(genpath('/opt/NM/NeuroMiner_Release/')); +addpath('/opt/SPM/spm12_v6685_cat12_r1207/'); +addpath(genpath('/volume/DP_FEF/ScrFun/ScriptsRepository')); + +if contains(IN.results_path, 'CTQ') + load('/volume/HCStress/Data/Stress_SPLS_DP/Stress_SPLS_DP/DATA/17-Jul-2018/Stress_SPLS_DP_data_table_NM_17-Jul-2018.mat') +elseif contains(IN.results_path, 'CISS') + load('/volume/data/PRONIA/DataDump/03-Apr-2020/QueryData/Munich/All_Birmingham_IncludingBrain_Disc/All_Birmingham_IncludingBrain_Disc/DATA/03-Aug-2020/All_Birmingham_IncludingBrain_Disc_data_table_NM_03-Aug-2020.mat') +elseif contains(IN.results_path, 'WSS') + load('/volume/data/PRONIA/DataDump/03-Apr-2020/QueryData/Munich/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc/DATA/19-May-2020/PRONIAQueryTemplate_v3_1_MU_DP_WSS_PLS_Disc_data_table_NM_19-May-2020.mat') +elseif contains(IN.results_path, 'immune', 'IgnoreCase', true) + +else + disp('Something''s wrong!'); +end + +load('/volume/HCStress/Data/EHI_data_for_req_PSN.mat'); +load('/volume/DP_FEF/WHOQOL_data.mat'); +EHI_01=[]; + +%% solve issues with w iteration for final parameters +if contains(IN.results_path, 'final_vis') + for i=1:size(output.final_parameters,1) + log_find = [output.opt_parameters.(['LV_', num2str(i)]){:,7}] == output.final_parameters{i,7}; + if sum(log_find)>1 + temp = output.opt_parameters.(['LV_', num2str(i)])(log_find,:); + log_find = [temp{:,2}] == output.final_parameters{i,2}; + output.final_parameters{i,1} = temp{log_find,1}; + else + output.final_parameters{i,1} = output.opt_parameters.(['LV_', num2str(i)]){log_find,1}; + end + end +end + +% compute epsilon and omega all by projecting u and v onto X and Y +try temp = load(input.X); + temp_names = fieldnames(temp); + X = temp.(temp_names{1}); +catch + X = input.X; +end + +Y = input.Y; + +RHO=[]; epsilon_all={}; omega_all={}; + +for i=1:size(output.final_parameters,1) + + if ~input.corrected_log(i) + Covars = nan(size(input.Y,1),1); + correction_target = 3; + else + Covars = input.covariates; + end + + [OUT_x, OUT_y] = dp_master_correctscale(X, Y, Covars, input.scaling_method, input.correction_target); + + u = output.final_parameters{i, strcmp(output.parameters_names, 'u')}; + v = output.final_parameters{i, strcmp(output.parameters_names, 'v')}; + + [RHO(i,1), epsilon_all{i,1}, omega_all{i,1}, ~, ~] = dp_projection(OUT_x, OUT_y, u, v, input.correlation_method); + + mdl = fitlm(epsilon_all{i,1}, omega_all{i,1}); + output.measures.linear_models{i} = mdl; + output.measures.Rsquared_Ordinary(i) = mdl.Rsquared; + + [X, Y] = proj_def(X, Y, u, v); + +end + +output.final_parameters = [output.final_parameters, epsilon_all, omega_all]; +output.parameters_names = [output.parameters_names, 'epsilon_all', 'omega_all']; + +%% load and add additional handedness data + +for i=1:size(input.data_collection.PSN_BOG,1) + try + EHI_01(i,1)=cell2mat(data_table.EHI_01(str2num(cell2mat(data_table{:, 'PATIENT_ID'})) == str2num(input.data_collection.PSN_BOG{i,1}))); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + EHI_01(i,1)=NaN; % EHI am Ende hinzufügen + end +end + +EHI_01 = EHI_01==2; + +try temp = load(input.MRI); +catch + temp = load(input.X); +end +field = fieldnames(temp); +MRI_for_analysis = temp.(field{1}); + +%% get data +% for i=1:size(input.data_collection.PSN_BOG,1) +data_table_selected=[]; +for i=1:size(input.data_collection.PSN_BOG,1) + try data_table_selected(i,:)=data_table_NM(contains(ID_name, input.data_collection.PSN_BOG{i,1}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + data_table_selected(i,:)=NaN; + end +end + +sociodem_variables = {'DEMOG_T0T1T2_31AA_EducationYears_T0', 'GF_S_1_Current_T0',... + 'GF_R_1_Current_T0', 'GAF_S_PastMonth_Screening', 'GAF_DI_PastMonth_Screening'}; + +sociodem_collection=[]; sociodem_collection_names={}; +for i=1:size(sociodem_variables,2) + temp = data_table_selected(:,contains(data_table_names_NM, sociodem_variables{i})); + sociodem_collection=[sociodem_collection,temp]; + temp_names = data_table_names_NM(contains(data_table_names_NM, sociodem_variables{i})); + sociodem_collection_names=[sociodem_collection_names,temp_names]; +end + +try sociodem_collection = [input.data_collection.age, input.data_collection.sex(:,2), input.data_collection.IQR, EHI_01, sociodem_collection]; +catch + input.data_collection.age = input.data_complete.foranalysis.basic.age; + input.data_collection.sex = input.data_complete.foranalysis.basic{:, {'male_sex', 'female_sex'}}; + input.data_collection.IQR = input.data_complete.foranalysis.mri.Cat12_IQR_sMRI_T0; + input.data_collection.Diagfull_names = input.selected_studygroups; + input.data_collection.Labels = input.data_complete.foranalysis.basic{input.final_PSN, 'Labels'}; + sociodem_collection = [input.data_collection.age, input.data_collection.sex(:,2), input.data_collection.IQR, EHI_01, sociodem_collection]; +end +sociodem_collection_names = ['age', 'female_sex', 'IQR', 'handedness', sociodem_collection_names]; + +% clinical_collection = clinical_collection(:,contains(clinical_collection_names, 'T0')); +% clinical_collection_names = clinical_collection_names(:,contains(clinical_collection_names, 'T0')); + +% compute PANSS scores +panss_total_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0','PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0','PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; +panss_pos_names = {'PANSS_P1_T0','PANSS_P2_T0','PANSS_P3_T0','PANSS_P4_T0','PANSS_P5_T0',... + 'PANSS_P6_T0','PANSS_P7_T0'}; +panss_neg_names = {'PANSS_N1_T0','PANSS_N2_T0','PANSS_N3_T0',... + 'PANSS_N4_T0','PANSS_N5_T0','PANSS_N6_T0','PANSS_N7_T0'}; +panss_gen_names = {'PANSS_G01_T0',... + 'PANSS_G02_T0','PANSS_G03_T0','PANSS_G04_T0','PANSS_G05_T0','PANSS_G06_T0',... + 'PANSS_G07_T0','PANSS_G08_T0','PANSS_G09_T0','PANSS_G10_T0','PANSS_G11_T0',... + 'PANSS_G12_T0','PANSS_G13_T0','PANSS_G14_T0','PANSS_G15_T0','PANSS_G16_T0'}; + +% panss positive scores +panss_total = sum(data_table_selected(:,ismember(data_table_names_NM, panss_total_names)),2); +panss_pos = sum(data_table_selected(:,ismember(data_table_names_NM, panss_pos_names)),2); +panss_neg = sum(data_table_selected(:,ismember(data_table_names_NM, panss_neg_names)),2); +panss_gen = sum(data_table_selected(:,ismember(data_table_names_NM, panss_gen_names)),2); + +% compute BDI scores +BDI_names = {'BDI2_01_T0','BDI2_02_T0','BDI2_03_T0','BDI2_04_T0','BDI2_05_T0',... + 'BDI2_06_T0','BDI2_07_T0','BDI2_08_T0','BDI2_09_T0','BDI2_10_T0','BDI2_11_T0',... + 'BDI2_12_T0','BDI2_13_T0','BDI2_14_T0','BDI2_15_T0','BDI2_16_T0','BDI2_17_T0',... + 'BDI2_18_T0','BDI2_19_T0','BDI2_20_T0','BDI2_21_T0'}; + +BDI_scores = sum(data_table_selected(:,ismember(data_table_names_NM, BDI_names)),2); + +clinical_collection = [panss_total, panss_pos, panss_neg, panss_gen, BDI_scores]; +clinical_collection_names = {'PANSS_total', 'PANSS_pos', 'PANSS_neg', 'PANSS_gen', 'BDI_scores'}; + +output.socio_clin_data.collection = [sociodem_collection, clinical_collection]; +output.socio_clin_data.collection_names = [sociodem_collection_names, clinical_collection_names]; + +output.socio_clin_data.means_std=[]; + +for i=1:size(unique(input.data_collection.Diag),1) + output.socio_clin_data.means_std(1:2:size(output.socio_clin_data.collection,2)*2,i) = nanmean(output.socio_clin_data.collection(input.data_collection.Diag==i,:),1)'; + output.socio_clin_data.means_std(2:2:size(output.socio_clin_data.collection,2)*2,i) = nanstd(output.socio_clin_data.collection(input.data_collection.Diag==i,:),1)'; +end + +temp_all=[]; +temp_all(1:2:size(output.socio_clin_data.collection,2)*2,1) = nanmean(output.socio_clin_data.collection,1)'; +temp_all(2:2:size(output.socio_clin_data.collection,2)*2,1) = nanstd(output.socio_clin_data.collection,1)'; + +output.socio_clin_data.means_std = [temp_all, output.socio_clin_data.means_std]; +output.socio_clin_data.names = output.socio_clin_data.collection_names'; +output.socio_clin_data.labels = ['all', input.data_collection.Diagfull_names]; + +% get site numbers for diagnoses +output.sites_data.counts=[]; +for i=1:size(input.sites,2) + [~, ~, ic_sites] = unique(input.data_collection.Diag(input.sites(:,i)>0)); + output.sites_data.counts(i,:) = accumarray(ic_sites, 1)'; +end + +[~, ~, ic_sites] = unique(input.data_collection.Diag); +temp_all = accumarray(ic_sites, 1)'; + +output.sites_data.counts = [temp_all; output.sites_data.counts]; +output.sites_data.names = input.data_collection.Diagfull_names; +output.sites_data.labels = input.sites_names; + +% test for site imbalances +bins = 0:(size(output.sites_data.labels,2)-1); +x = sum(output.sites_data.counts(2:end,:),2); +obsCounts = x; +expCounts = ones(7,1)*round(mean(obsCounts)); +try [h,p,st] = chi2gof(bins,'Ctrs',bins, 'Frequency',obsCounts, 'Expected',expCounts,'NParams',0); +catch + difference = sum(expCounts)-sum(obsCounts); + obsCounts(end) = obsCounts(end)+difference; + [h,p,st] = chi2gof(bins,'Ctrs',bins, 'Frequency',obsCounts, 'Expected',expCounts,'NParams',0); +end + +output.sites_data.main_stats = [h,p,st.chi2stat]; +labels = output.sites_data.labels; nn=1; +for ii=1:size(labels,2) + for iii=(ii+1):size(labels,2) + if ii~=iii + bins = 0:1; + x_1 = x(contains(labels, labels(ii))); + x_2 = x(contains(labels, labels(iii))); + obsCounts = [sum(x_1), sum(x_2)]; + expCounts = [round(mean(obsCounts)), sum([x_1; x_2])-round(mean(obsCounts))]; + [h,p,st] = chi2gof(bins,'Ctrs',bins, 'Frequency',obsCounts, 'Expected',expCounts,'NParams',0); + temp_sites(nn,:) = [st.chi2stat, p]; + row_names{nn,1} = [labels{ii}, '_', labels{iii}]; + else + temp_sites(nn,:) = [NaN, NaN]; + row_names{nn,1} = ['NaN_', num2str(nn)]; + end + nn=nn+1; + end +end + +output.sites_data.multcomp_stats = array2table(temp_sites, 'RowNames', strrep(row_names, 'INSTITUTE_SHORTNAME_sMRI_T0_', ''), 'VariableNames', {'Chi2Stat', 'p'}); +output.sites_data.multcomp_stats.p = dp_FDR_adj(output.sites_data.multcomp_stats.p); + +%% get medication and IQ +IN.groups = {'ROD', 'CHR', 'ROP'}; +output.meds_and_iq = dp_pronia_meds_and_IQ(IN); + +%% test for significant differences between groups +% first test with ANOVA for overall differences +output.socio_clin_data.sociodem_results=[]; output.socio_clin_data.sociodem_results_ext={}; +output.socio_clin_data.chi2square_mult=[]; output.socio_clin_data.chi2square_mult_ext={}; +for i=1:size(output.socio_clin_data.names,1) + if size(unique(output.socio_clin_data.collection(:,i)),1)<4 + bins = 0:1; + x = output.socio_clin_data.collection(:,i); + obsCounts = [sum(x), size(x,1)-sum(x)]; + expCounts = [round(mean(obsCounts)), size(x,1)-round(mean(obsCounts))]; + [h,p,st] = chi2gof(bins,'Ctrs',bins, 'Frequency',obsCounts, 'Expected',expCounts,'NParams',0); + temp_sociodem_results(i,:) = [st.chi2stat, p]; + output.socio_clin_data.sociodem_results_ext(i,:) = {p,h,st}; + labels = unique(input.data_collection.Labels); + nn=1; temp_mult=[]; row_names={}; + for ii=1:size(labels,1) + for iii=(ii+1):size(labels,1) + if ii~=iii + bins = 0:1; + x_1 = output.socio_clin_data.collection(contains(input.data_collection.Labels, labels(ii)),i); + x_2 = output.socio_clin_data.collection(contains(input.data_collection.Labels, labels(iii)),i); + obsCounts = [sum(x_1), sum(x_2)]; + expCounts = [round(mean(obsCounts)), sum([x_1; x_2])-round(mean(obsCounts))]; + [h,p,st] = chi2gof(bins,'Ctrs',bins, 'Frequency',obsCounts, 'Expected',expCounts,'NParams',0); + temp_mult(nn,:) = [st.chi2stat, p]; + output.socio_clin_data.chi2square_mult_ext.(output.socio_clin_data.names{i})(nn,:)= {p,h,st}; + row_names{nn,1} = [labels{ii}, '_', labels{iii}]; + else + temp_mult(nn,:) = [NaN, NaN]; + output.socio_clin_data.chi2square_mult_ext.(output.socio_clin_data.names{i})(nn,:)= {NaN,NaN,NaN}; + row_names{nn,1} = ['NaN_', num2str(nn)]; + end + nn=nn+1; + end + end + output.socio_clin_data.chi2square_mult.(output.socio_clin_data.names{i}) = array2table(temp_mult, 'RowNames', row_names, 'VariableNames', {'Chi2Stat', 'p'}); + output.socio_clin_data.chi2square_mult.(output.socio_clin_data.names{i}).p = dp_FDR_adj(output.socio_clin_data.chi2square_mult.(output.socio_clin_data.names{i}).p); + else + [p,tbl,stats] = kruskalwallis(output.socio_clin_data.collection(:,i), input.data_collection.Labels); + close all + temp_sociodem_results(i,:) = [tbl{2,5}, p]; + output.socio_clin_data.sociodem_results_ext(i,:) = {p,tbl,stats}; + [output.socio_clin_data.Dunn_results{i,1}, ~, ~, output.socio_clin_data.Dunn_results_labels] = multcompare(stats, 'CType', 'dunn-sidak', 'Estimate', 'kruskalwallis'); + close all + end +end + +output.socio_clin_data.sociodem_results = array2table(temp_sociodem_results, 'RowNames', output.socio_clin_data.names, 'VariableNames', {'Chi2Stat', 'p'}); +output.socio_clin_data.sociodem_results.p = dp_FDR_adj(output.socio_clin_data.sociodem_results.p); + +%% look for specific correlations + +output.post_hoc_correlations.data_collection=[]; +output.post_hoc_correlations.names=[]; + +if any(strcmp(IN.SD_selection, 'GAF')) + + % get GAF data + GAF_names = {'GAF_S_LifeTime_Screening','GAF_S_PastYearT0_Screening',... + 'GAF_S_PastMonth_Screening','GAF_DI_LifeTime_Screening','GAF_DI_PastYear_Screening',... + 'GAF_DI_PastMonth_Screening', 'GF_S_1_Current_T0','GF_S_2_LowPastYearT0_T0',... + 'GF_S_3_HighPastYearT0_T0','GF_S_4_HighLifetimeT0_T0', 'GF_R_1_Current_T0',... + 'GF_R_2_LowPastYearT0_T0','GF_R_3_HighPastYearT0_T0','GF_R_4_HighLifetimeT0_T0'}; + + % GAF_variables = data_table_names_NM(~cellfun(@isempty,(strfind(data_table_names_NM, 'GAF')))); + GAF_data=[]; + for i=1:size(GAF_names,2) + temp = data_table_selected(:, contains(data_table_names_NM, GAF_names{i})); + GAF_data=[GAF_data,temp]; + end + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, GAF_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, GAF_data]; +end + +if any(strcmp(IN.SD_selection, 'BDI')) + + % get BDI data + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, 'BDI']; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, BDI_scores]; + +end + +if any(strcmp(IN.SD_selection, 'neurocognition')) + + % get extraction data + load('/volume/RU_DP_immune/Data/Immune_megapaper_request_Disc/Immune_megapaper_request_Disc/DATA/18-Dec-2020/Immune_megapaper_request_Disc_Data_all_18-Dec-2020.mat'); + temp_data_table = data_table_all; + + load('/volume/RU_DP_immune/Data/Immune_megapaper_request_Repl/Immune_megapaper_request_Repl/DATA/18-Dec-2020/Immune_megapaper_request_Repl_Data_all_18-Dec-2020.mat'); + temp_names = temp_data_table.Properties.VariableNames(matches(temp_data_table.Properties.VariableNames, data_table_all.Properties.VariableNames)); + data_table_all = [temp_data_table(:, temp_names); data_table_all(:, temp_names)]; + + NC = rs_neurocognition(data_table_all); + output.neurocognition_data=table; + for i=1:size(input.data_collection.PSN_BOG,1) + log_temp = str2num(cell2mat(data_table_all.PSN)) == str2num(input.data_collection.PSN_BOG{i,1}); + if sum(log_temp)~=0 + output.neurocognition_data(i,:) = NC.single_scores(log_temp, :); + else + output.neurocognition_data(i,:) = array2table(nan(1, size(NC.single_scores,2)), 'VariableNames', NC.single_scores.Properties.VariableNames); + end + end + output.neurocognition_data.Properties.RowNames = input.data_collection.PSN_BOG; + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names, output.neurocognition_data.Properties.VariableNames]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection, output.neurocognition_data.Variables]; + +end + + +if any(strcmp(IN.SD_selection, 'NEO')) + + % get NEO FFI data + neo_log = ~cellfun(@isempty,(strfind(data_table_names_NM, 'NEO'))); + neo_names = data_table_names_NM(neo_log); + neo_data_temp = data_table_selected(:, neo_log); + + NEO.neuroticism.negative_affect = [1, 11, 16, 31, 46]; + NEO.neuroticism.self_reproach = [6, 21, 26, 36, 41, 51, 56]; + NEO.extraversion.positive_affect = [7, 12, 37, 42]; + NEO.extraversion.sociability = [2, 17, 27, 57]; + NEO.extraversion.activity = [22, 32, 47, 52]; + NEO.openness.aesthetic_interests = [13, 23, 43]; + NEO.openness.intellectual_interests = [33, 48, 53, 58]; + NEO.openness.unconventionality = [3,8,18, 28, 38]; + NEO.agreeableness.nonantagonistic_orientation = [9,14,19,24,29,44,54,59]; + NEO.agreeableness.prosocial_orientation = [4, 34, 39, 49]; + NEO.conscientiousness.orderliness = [5,10,15,30,55]; + NEO.conscientiousness.goal_striving = [25, 35, 60]; + NEO.conscientiousness.dependability = [20, 40, 45, 50]; + NEO_inverse_questions = [1,16,31,46,12,42,27,57,23,33,48,3,8,18,38,9,14,24,29,44,54,59,39,15,30,55,45]; + NEO_inverse_algorithm = [1:1:5;5:-1:1]; + NEO_poorly_functioning = [6,12,27,42,3,8,28,38,9,19,24,29,34,15]; + + %exclude poorly functioning questions (optional) + fields=fieldnames(NEO); + for i=1:size(fields,1) + fields1=fieldnames(NEO.(fields{i})); + for ii=1:size(fields1,1) + temp = NEO.(fields{i}).(fields1{ii}); + log = ismember(temp, NEO_poorly_functioning); + temp(log)=[]; + NEO.(fields{i}).(fields1{ii})=temp; + end + end + + neo_data_inv=[]; + for i=1:size(neo_data_temp,1) + temp_row = neo_data_temp(i,:); + for ii=1:size(temp_row,2) + if sum(ii==NEO_inverse_questions)>0 + try + temp_row(ii)=NEO_inverse_algorithm(2,temp_row(ii)==NEO_inverse_algorithm(1,:)); + catch + temp_row(ii)=NaN; + end + end + end + neo_data_inv(i,:)=temp_row; + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(neo_data_inv),2)<=(0.2*size(neo_data_inv,2)); + temp_neo_data_inv = dp_impute(neo_data_inv(log_impute,:), 'euclidean'); + neo_data_inv_imp = neo_data_inv; + neo_data_inv_imp(log_impute,:) = temp_neo_data_inv; + + % compute sum scores + fields = fieldnames(NEO); + for i=1:size(fields,1) + fields1 = fieldnames(NEO.(fields{i})); + temp_collect.(fields{i})=[]; + for ii=1:size(fields1,1) + temp_collect.(fields{i}) = [temp_collect.(fields{i}), NEO.(fields{i}).(fields1{ii})]; + end + end + + fields=fieldnames(temp_collect); + neo_names=[]; + for i=1:size(fields,1) + neo_names=[neo_names, {['NEO_', fields{i}]}]; + end + + neo_data_foranalysis = neo_data_inv_imp; + for i=1:size(neo_data_foranalysis,1) + temp_sum=[]; + for ii=1:size(fields,1) + temp_sum = [temp_sum, sum(neo_data_foranalysis(i,temp_collect.(fields{ii})))]; + end + neo_data_sum(i,:)=temp_sum; + end + + % add NEO FFI data to corr data + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names,neo_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection,neo_data_sum]; +end + +% get QUOL data +if any(strcmp(IN.SD_selection, 'QOL')) + WHO_table = [HC;PAT]; + + for i=1:26 + if i<10 + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_0', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_0', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + else + try + WHO_raw(:,i) = WHO_table.(['WHOQOL_', num2str(i)]); + catch + temp = WHO_table.(['WHOQOL_', num2str(i)]); + temp(cellfun(@isempty, temp))={'NaN'}; + WHO_raw(:,i)=temp; + end + end + end + + for i=1:size(WHO_raw,1) + temp=WHO_raw(i,:); + log=cellfun(@isempty, temp); + temp(log)={num2str(NaN)}; + WHO_raw(i,:)=temp; + for ii=1:size(temp,2) + try + temp_new(i,ii)=str2num(temp{ii}); + catch + temp_new(i,ii)=NaN; + end + end + end + + WHO_new=temp_new; + + WHO_ID=[HC.PATIENT_ID; PAT.PATIENT_ID]; + + WHO_selected=[]; + for i=1:size(input.data_collection.PSN_BOG,1) + try + WHO_selected(i,:)=WHO_new(strcmp(WHO_ID, input.data_collection.PSN_BOG{i}),:); + catch + disp([input.data_collection.PSN_BOG{i}, ' is missing.']); + WHO_selected(i,:)=NaN; + end + end + + % impute data with at least 80% complete questions + log_impute = sum(isnan(WHO_selected),2)<=(0.2*size(WHO_selected,2)); + temp_WHO_selected = dp_impute(WHO_selected(log_impute,:), 'euclidean'); + WHO_selected_imp = WHO_selected; + WHO_selected_imp(log_impute,:) = temp_WHO_selected; + + % compute sum scores + WHO.physical = [3, 4, 10, 15, 16, 17, 18]; + WHO.psychosocial = [5, 6, 7, 11, 19, 26]; + WHO.social_relationship = [20, 21, 22]; + WHO.environment = [8, 9, 12, 13, 14, 23, 24, 25]; + + WHO_sum = [sum(WHO_selected_imp(:,WHO.physical),2), sum(WHO_selected_imp(:,WHO.psychosocial),2),... + sum(WHO_selected_imp(:,WHO.social_relationship),2), sum(WHO_selected_imp(:,WHO.environment),2)]; + + WHO_names = {'WHO_physical', 'WHO_psychosocial', 'WHO_social_relationship', 'WHO_environment'}; + + output.post_hoc_correlations.names = [output.post_hoc_correlations.names,WHO_names]; + output.post_hoc_correlations.data_collection = [output.post_hoc_correlations.data_collection,WHO_sum]; +end + +output.post_hoc_correlations.data_table = array2table(output.post_hoc_correlations.data_collection, 'RowNames', input.data_collection.PSN_BOG(:,1)); +output.post_hoc_correlations.data_table.Properties.VariableNames = output.post_hoc_correlations.names; + +%% proper correlations +log_epsilon = strcmp(output.parameters_names, 'epsilon'); +log_omega = strcmp(output.parameters_names, 'omega'); +log_epsilon_all = strcmp(output.parameters_names, 'epsilon_all'); +log_omega_all = strcmp(output.parameters_names, 'omega_all'); +log_coll = {log_epsilon, log_epsilon_all; log_omega, log_omega_all; 'test', 'all'}; + +for l=1:size(log_coll,2) + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.RHO=[]; + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p=[]; + output.post_hoc_correlations.(log_coll{3,l}).correlations.validation.RHO=[]; + output.post_hoc_correlations.(log_coll{3,l}).correlations.validation.p=[]; + output.post_hoc_mdl.(log_coll{3,l}).test.R2=[]; + output.post_hoc_mdl.(log_coll{3,l}).test.p=[]; + output.post_hoc_mdl.(log_coll{3,l}).validation.R2=[]; + output.post_hoc_mdl.(log_coll{3,l}).validation.p=[]; + + for i=1:(size(output.final_parameters,1)-1) + + epsilon = output.final_parameters{i,log_coll{1,l}}; + omega = output.final_parameters{i,log_coll{2,l}}; + X = [epsilon, omega]; + + if input.selection_train == 1 + if strcmp(log_coll{3,l}, 'test') + y = output.post_hoc_correlations.data_collection(output.CV.cv_outer_indices.TestInd{1,output.final_parameters{i,1}},:); + elseif strcmp(log_coll{3,l}, 'all') + idx = []; + for ii=1:size(output.CV.cv_outer_indices.TestInd,2) + idx = [idx; output.CV.cv_outer_indices.TestInd{1,ii}]; + end + X = X(idx,:); + y = output.post_hoc_correlations.data_collection(idx,:); + end + elseif input.selection_train == 2 + idx = []; + for ii=1:size(output.CV.cv_outer_indices.TestInd,2) + idx = [idx; output.CV.cv_outer_indices.TestInd{1,ii}]; + end + y = output.post_hoc_correlations.data_collection(idx,:); + end + + [RHO,p] = corr(X, y, 'Type', 'Spearman', 'rows', 'complete'); + % [p(1,:), ~] = dp_FDR_adj(p(1,:)); + % [p(2,:), ~] = dp_FDR_adj(p(2,:)); + + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.RHO = [output.post_hoc_correlations.(log_coll{3,l}).correlations.test.RHO, RHO']; + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p = [output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p, p']; + + p=[]; R2=[]; mdl_coll={}; + + for v=1:size(y,2) + mdl = fitlm(X, y(:,v)); + p(v,1) = mdl.coefTest; + R2(v,1) = mdl.Rsquared.Adjusted; + % mdl_coll = [mdl_coll; {mdl}]; + end + + output.post_hoc_mdl.(log_coll{3,l}).test.R2 = [output.post_hoc_mdl.(log_coll{3,l}).test.R2, R2]; + output.post_hoc_mdl.(log_coll{3,l}).test.p = [output.post_hoc_mdl.(log_coll{3,l}).test.p, p]; + % output.post_hoc_mdl.(log_coll{3,l}).test.mdl = [output.post_hoc_mdl.(log_coll{3,l}).test.mdl, mdl_coll]; + + % [output.post_hoc_mdl.(log_coll{3,l}).test.p, ~] = dp_FDR_adj(output.post_hoc_mdl.(log_coll{3,l}).test.p); + + if ~islogical(input.validation_set) + epsilon = output.validation_results{i,log_epsilon}; + omega = output.validation_results{i,log_omega}; + X = [epsilon, omega]; + y = output.post_hoc_correlations.data_collection(output.validation_indices.TestInd{1,1},:); + [RHO,p] = corr(X, y, 'Type', 'Spearman', 'rows', 'complete'); + % [p(1,:), ~] = dp_FDR_adj(p(1,:)); + % [p(2,:), ~] = dp_FDR_adj(p(2,:)); + + output.post_hoc_correlations.correlations.validation.RHO = [output.post_hoc_correlations.correlations.validation.RHO, RHO']; + output.post_hoc_correlations.correlations.validation.p = [output.post_hoc_correlations.correlations.validation.p, p']; + + p=[]; R2=[];mdl_coll={}; + + for v=1:size(y,2) + mdl = fitlm(X, y(:,v)); + p(v,1) = mdl.coefTest; + R2(v,1) = mdl.Rsquared.Adjusted; + % mdl_coll=[mdl_coll; {mdl}]; + end + + output.post_hoc_mdl.validation.R2 = [output.post_hoc_mdl.validation.R2, R2]; + output.post_hoc_mdl.validation.p = [output.post_hoc_mdl.validation.p, p]; + % output.post_hoc_mdl.(log_coll{3,l}).validation.mdl = [output.post_hoc_mdl.(log_coll{3,l}).validation.mdl, mdl_coll]; + + % [output.post_hoc_mdl.(log_coll{3,l}).validation.p, ~] = dp_FDR_adj(output.post_hoc_mdl.(log_coll{3,l}).validation.p); + output.post_hoc_mdl.validation.p = dp_FDR_adj(output.post_hoc_mdl.validation.p); + end + + end + + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p = dp_FDR_adj(output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p); + output.post_hoc_mdl.(log_coll{3,l}).test.p = dp_FDR_adj(output.post_hoc_mdl.(log_coll{3,l}).test.p); + + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_RHO = array2table(output.post_hoc_correlations.(log_coll{3,l}).correlations.test.RHO, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_p = array2table(output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.(log_coll{3,l}).test.table_p = array2table(output.post_hoc_mdl.(log_coll{3,l}).test.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.(log_coll{3,l}).test.table_R2 = array2table(output.post_hoc_mdl.(log_coll{3,l}).test.R2, 'RowNames', output.post_hoc_correlations.names); + + temp=[];temp_names={};nn=1; + for rr=1:size(output.post_hoc_correlations.(log_coll{3,l}).correlations.test.RHO,1) + temp(nn,:) = output.post_hoc_correlations.(log_coll{3,l}).correlations.test.RHO(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_RHO']; + nn=nn+1; + temp(nn,:) = output.post_hoc_correlations.(log_coll{3,l}).correlations.test.p(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_p']; + nn=nn+1; + end + + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_RHO_p = array2table(temp, 'RowNames', temp_names); + + if ~islogical(input.validation_set) + output.post_hoc_correlations.(log_coll{3,l}).correlations.validation.p = dp_FDR_adj(output.post_hoc_correlations.(log_coll{3,l}).correlations.validation.p); + output.post_hoc_correlations.(log_coll{3,l}).correlations.validation.table_RHO = array2table(output.post_hoc_correlations.correlations.validation.RHO, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_correlations.(log_coll{3,l}).correlations.validation.table_p = array2table(output.post_hoc_correlations.correlations.validation.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.validation.table_p = array2table(output.post_hoc_mdl.validation.p, 'RowNames', output.post_hoc_correlations.names); + output.post_hoc_mdl.validation.table_R2 = array2table(output.post_hoc_mdl.validation.R2, 'RowNames', output.post_hoc_correlations.names); + + temp=[];temp_names={};nn=1; + for rr=1:size(output.post_hoc_correlations.correlations.validation.RHO,1) + temp(nn,:) = output.post_hoc_correlations.correlations.validation.RHO(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_RHO']; + nn=nn+1; + temp(nn,:) = output.post_hoc_correlations.correlations.validation.p(rr,:); + temp_names{nn,1} = [output.post_hoc_correlations.names{rr}, '_p']; + nn=nn+1; + end + + output.post_hoc_correlations.correlations.validation.table_RHO_p = array2table(temp, 'RowNames', temp_names); + end + + nn=1;temp_vars={}; + for i=1:size(output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_RHO,2)/2 + temp_vars{1,nn} = ['epsilon_LV', num2str(i)]; + nn=nn+1; + temp_vars{1,nn} = ['omega_LV', num2str(i)]; + nn=nn+1; + end + + temp_vars_names={}; + for i=1:size(output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_RHO,2)/2 + temp_vars_names{1,i} = ['LV', num2str(i), '_latent_scores']; + end + + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_RHO.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_p.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.(log_coll{3,l}).correlations.test.table_RHO_p.Properties.VariableNames = temp_vars; + output.post_hoc_mdl.(log_coll{3,l}).test.table_p.Properties.VariableNames = temp_vars_names; + output.post_hoc_mdl.(log_coll{3,l}).test.table_R2.Properties.VariableNames = temp_vars_names; + + if ~islogical(input.validation_set) + + output.post_hoc_correlations.correlations.validation.table_RHO.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.validation.table_p.Properties.VariableNames = temp_vars; + output.post_hoc_correlations.correlations.validation.table_RHO_p.Properties.VariableNames = temp_vars; + output.post_hoc_mdl.validation.table_p.Properties.VariableNames = temp_vars_names; + output.post_hoc_mdl.validation.table_R2.Properties.VariableNames = temp_vars_names; + + end + + % save(IN.results_path, 'input', 'output', 'setup'); + +end + +end + + diff --git a/Visualization_Module/dp_spls.m b/Visualization_Module/dp_spls.m new file mode 100644 index 0000000..5bf2e93 --- /dev/null +++ b/Visualization_Module/dp_spls.m @@ -0,0 +1,281 @@ +%% adapted DP SPLS function for full functionality + +function [u, v, U, S, V, success] = dp_spls(X, Y, cu, cv, V_original) +% +% Sparse PLS algorithm, please check Monteiro et al. 2016 for details: +% doi:10.1016/j.jneumeth.2016.06.011 +% +% Inputs: X, Y - data matrices in the form: samples x features. These +% should have each feature with mean = 0 and std = 1; +% +% cu, cv - sparcity regularization hyperparameters, must be +% between 1 and sqrt(number_features). The lower it is, +% the spaser the solution. If it is outside this range, +% no sparsity will be applied in the corresponding view. +% +% e - convergence threshold (see the code for info on how it +% works). Default: 1E-5 +% +% itr_lim - maximum number of iterations (it give a warning +% if it does not converge). Default: 1000 +% +% +% Outputs: u, v - weight vectors for X and Y, respectively +% +% success - will return "false" if something went wrong during +% the weight vector computation +% +% Version: 2016-08-20 +%__________________________________________________________________________ + +% Written by Joao Matos Monteiro +% Email: joao.monteiro@ucl.ac.uk + + +%--- Initial checks +%-------------------------------------------------------------------------- + +% Check if lu anv lv obey the limits +if cu < 1 || cu > sqrt(size(X,2)) + warning('lu is out of interval: 1 <= lu <= sqrt(size(X,2). Not using spasity on u.') + no_sparse_X = true; + failed_sparsity_u = false; +else + no_sparse_X = false; +end +if cv < 1 || cv > sqrt(size(Y,2)) + warning('lv is out of interval: 1 <= lv <= sqrt(size(Y,2). Not using spasity on v.') + no_sparse_Y = true; + failed_sparsity_v = false; +else + no_sparse_Y = false; +end + +% Convergence threshold +if ~exist('e', 'var') + e = 1E-5; +end + +% Iteration limit for calculating a vector pair +if ~exist('itr_lim', 'var') + itr_lim = 1000; +end + + + +%--- SPLS +%-------------------------------------------------------------------------- + +%--- Compute the covariance matrix +C = X'*Y; + +%--- Initialise weight vectors +u_temp = nan(size(X, 2), 2); +v_temp = nan(size(Y, 2), 2); + +if exist('V_original', 'var') + %--- compute SVD + [U_resampled,S,V_resampled] = svd(C,0); + + %--- Perform Procrustes Rotation if permutation flag is set + C_temp = V_original'*V_resampled; + [N,~,P] = svd(C_temp,0); + Q = N*P'; + V = V_resampled * S * Q; + U = U_resampled * S * Q; + + %--- continue with the rotated U and V matrices in the SPLS algorithm + u_temp(:,1) = U(:,1); + u_temp(:,1) = u_temp(:,1)./norm(u_temp(:,1)); % normalise + v_temp(:,1) = V(:,1); + v_temp(:,1) = v_temp(:,1)./norm(v_temp(:,1)); % normalise +else + %--- Perform SVD + [U,S,V] = svd(C,0); + u_temp(:,1) = U(:,1); + u_temp(:,1) = u_temp(:,1)./norm(u_temp(:,1)); % normalise + v_temp(:,1) = V(:,1); + v_temp(:,1) = v_temp(:,1)./norm(v_temp(:,1)); % normalise +end + +%--- Main Loop +diff = 10*e; %start the diff with a high value +i = 0; +success = true; + +while diff > e && success + + %--- Compute u + if no_sparse_X + u_temp(:,2) = C*v_temp(:,1); + u_temp(:,2) = u_temp(:,2)./norm(u_temp(:,2), 2); + else + [u_temp(:,2), tmp_success] = update(C*v_temp(:,1), cu); + failed_sparsity_u = ~tmp_success; + if failed_sparsity_u % If it was not successful, return non sparse version + u_temp(:,2) = C*v_temp(:,1); + u_temp(:,2) = u_temp(:,2)./norm(u_temp(:,2), 2); + end + end + dim_u = sum(u_temp(:,2)~=0); + if ~dim_u + error(['No weights were included in the model, this should never '... + 'happen. Try increasing lu.']); + end + + + %--- Compute v + if no_sparse_Y + v_temp(:,2) = C'*u_temp(:,2); + v_temp(:,2) = v_temp(:,2)./norm(v_temp(:,2), 2); + else + [v_temp(:,2), tmp_success] = update(C'*u_temp(:,2), cv); + failed_sparsity_v = ~tmp_success; + if failed_sparsity_v % If it was not successful, return non sparse version + v_temp(:,2) = C'*u_temp(:,2); + v_temp(:,2) = v_temp(:,2)./norm(v_temp(:,2), 2); + end + end + dim_v = sum(v_temp(:,2)~=0); + if ~dim_v + error(['No weights were included in the model, this should never '... + 'happen. Try increasing lv.']); + end + + + %--- Check convergence + diff_u = norm(u_temp(:,2) - u_temp(:,1)); + diff_v = norm(v_temp(:,2) - v_temp(:,1)); + if diff_u >= diff_v, diff = diff_u; else diff = diff_v; end + % update u and v for the next iteration + u_temp(:,1) = u_temp(:,2); + v_temp(:,1) = v_temp(:,2); + + if i >= itr_lim +% warning('Maximum number of iterations reached.'); + success = false; + end + + i = i+1; +end + +if failed_sparsity_u +% warning(['There was a problem with the delta estimation in u.' ... +% ' The solution was forced to be non-sparse. Take results with a grain of salt.']); + success = false; +end + +if failed_sparsity_v +% warning(['There was a problem with the delta estimation in v.' ... +% ' The solution was forced to be non-sparse. Take results with a grain of salt.']); + success = false; +end + +% fprintf('SPLS: itr: %d diff: %.2e dim_u: %d dim_v: %d\n', i, diff, dim_u, dim_v); + +%--- Add converged weight vectors to output +u = u_temp(:, end); +v = v_temp(:, end); + + +end + + +%--- Private functions +%-------------------------------------------------------------------------- +function [up, success] = update(w, c) + +success = true; + +%--- update values +delta = 0; +up = soft_thresh(w, delta); +up = up./norm(up,2); + +%--- check if it obeys the condition. If not, find delta that does. +if norm(up, 1) > c + + delta1 = delta; + delta2 = delta1+1.1; % delta2 must be > 1 + + % get first estimate of delta2 + flag = false; + i = 0; + max_delta = 0; + while ~flag + up = soft_thresh(w, delta2); + up = up./norm(up,2); + + if sum(abs(up)) == 0 || isnan(sum(abs(up))) % if everthing is zero, the up/|up| will be 0/0 = nan + delta2 = delta2/1.618; % They have to be diferent, otherwise it might not converge + elseif norm(up, 1) > c + delta1 = delta2; + delta2 = delta2*2; % They have to be diferent, otherwise it might not converge + elseif norm(up, 1) <= c + flag = true; + end + + if delta2>max_delta, max_delta = delta2;end + + if delta2 == 0 +% warning('Delta has to be zero.'); + success = false; + break + end + i = i+1; + if i>1E4 +% warning('First delta estimation update did not converge.'); + delta1 = 0; + delta2 = max_delta; + break + end + end + + + up = bisec(w, c, delta1, delta2); + if isempty(up) || sum(isnan(up))>0 +% warning('Delta estimation unsuccessful.') + success = false; + end + + +end + + + +end + +function out = soft_thresh(a,delta) +% Performs soft threshold (it does not normalize the output) +diff = abs(a)-delta; +diff(diff<0) = 0; +out = sign(a).*diff; + +end + + +function out = bisec(K, c, x1,x2) +converge = false; +success = true; +tolerance = 1E-6; +while ~converge && success + x = (x2 + x1) / 2; + out = soft_thresh(K, x); + out = out./norm(out,2); + if sum(abs(out)) == 0 + x2 = x; + elseif norm(out, 1) > c + x1 = x; + elseif norm(out, 1) < c + x2 = x; + end + + diff = abs(norm(out, 1) - c); + if diff <= tolerance + converge = true; + elseif isnan(sum(diff)) + success = false; + out = nan(size(K)); + end +end +end \ No newline at end of file diff --git a/Visualization_Module/dp_spls_full.m b/Visualization_Module/dp_spls_full.m new file mode 100644 index 0000000..c224fb1 --- /dev/null +++ b/Visualization_Module/dp_spls_full.m @@ -0,0 +1,17 @@ +%% DP function for one k split + +function [RHO, u, v, V, epsilon, omega] = dp_spls_full(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method, V_original) + +%perform SPLS on the training data using the current cu/cv combination +if exist('V_original', 'var') + [u, v, ~, ~, V, ~] = dp_spls(training_data_x, training_data_y, cu, cv, V_original); +else + [u, v, ~, ~, V, ~] = dp_spls(training_data_x, training_data_y, cu, cv); +end + +%compute the correlation between the projections of the training and +%test matrices onto the SPLS latent space spanned by the weight vectors + +[RHO, epsilon, omega, u, v] = dp_projection(test_data_x, test_data_y, u, v, correlation_method); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_spls_resample.m b/Visualization_Module/dp_spls_resample.m new file mode 100644 index 0000000..fc3595a --- /dev/null +++ b/Visualization_Module/dp_spls_resample.m @@ -0,0 +1,274 @@ +%% DP resample SPLS function including Procrustes transformation + +function [u, v, success] = dp_spls_resample(X, Y, cu, cv, V_original) +% +% Sparse PLS algorithm, please check Monteiro et al. 2016 for details: +% doi:10.1016/j.jneumeth.2016.06.011 +% +% Inputs: X, Y - data matrices in the form: samples x features. These +% should have each feature with mean = 0 and std = 1; +% +% cu, cv - sparcity regularization hyperparameters, must be +% between 1 and sqrt(number_features). The lower it is, +% the spaser the solution. If it is outside this range, +% no sparsity will be applied in the corresponding view. +% +% e - convergence threshold (see the code for info on how it +% works). Default: 1E-5 +% +% itr_lim - maximum number of iterations (it give a warning +% if it does not converge). Default: 1000 +% +% +% Outputs: u, v - weight vectors for X and Y, respectively +% +% success - will return "false" if something went wrong during +% the weight vector computation +% +% Version: 2016-08-20 +%__________________________________________________________________________ + +% Written by Joao Matos Monteiro +% Email: joao.monteiro@ucl.ac.uk + + +%--- Initial checks +%-------------------------------------------------------------------------- + +% Check if lu anv lv obey the limits +if cu < 1 || cu > sqrt(size(X,2)) + warning('lu is out of interval: 1 <= lu <= sqrt(size(X,2). Not using spasity on u.') + no_sparse_X = true; + failed_sparsity_u = false; +else + no_sparse_X = false; +end +if cv < 1 || cv > sqrt(size(Y,2)) + warning('lv is out of interval: 1 <= lv <= sqrt(size(Y,2). Not using spasity on v.') + no_sparse_Y = true; + failed_sparsity_v = false; +else + no_sparse_Y = false; +end + +% Convergence threshold +% if ~exist('e', 'var') +e = 1E-5; +% end + +% Iteration limit for calculating a vector pair +% if ~exist('itr_lim', 'var') +itr_lim = 1000; +% end + + + +%--- SPLS +%-------------------------------------------------------------------------- + +%--- Compute the covariance matrix +C = X'*Y; + +%--- Initialise weight vectors +u_temp = nan(size(X, 2), 2); +v_temp = nan(size(Y, 2), 2); + +%--- compute SVD +[U_resampled,S_resampled,V_resampled] = svd(C,0); + +%--- Perform Procrustes Rotation if permutation flag is set +C_temp = V_original'*V_resampled; +[N,~,P] = svd(C_temp,0); +Q = N*P'; +V = V_resampled * S_resampled * Q; +U = U_resampled * S_resampled * Q; + +%--- continue with the rotated U and V matrices in the SPLS algorithm +u_temp(:,1) = U(:,1); +u_temp(:,1) = u_temp(:,1)./norm(u_temp(:,1)); % normalise +v_temp(:,1) = V(:,1); +v_temp(:,1) = v_temp(:,1)./norm(v_temp(:,1)); % normalise + +clear U V + +%--- Main Loop +diff = 10*e; %start the diff with a high value +i = 0; +success = true; + +while diff > e && success + + %--- Compute u + if no_sparse_X + u_temp(:,2) = C*v_temp(:,1); + u_temp(:,2) = u_temp(:,2)./norm(u_temp(:,2), 2); + else + [u_temp(:,2), tmp_success] = update(C*v_temp(:,1), cu); + failed_sparsity_u = ~tmp_success; + if failed_sparsity_u % If it was not successful, return non sparse version + u_temp(:,2) = C*v_temp(:,1); + u_temp(:,2) = u_temp(:,2)./norm(u_temp(:,2), 2); + end + end + dim_u = sum(u_temp(:,2)~=0); + if ~dim_u + error(['No weights were included in the model, this should never '... + 'happen. Try increasing lu.']); + end + + + %--- Compute v + if no_sparse_Y + v_temp(:,2) = C'*u_temp(:,2); + v_temp(:,2) = v_temp(:,2)./norm(v_temp(:,2), 2); + else + [v_temp(:,2), tmp_success] = update(C'*u_temp(:,2), cv); + failed_sparsity_v = ~tmp_success; + if failed_sparsity_v % If it was not successful, return non sparse version + v_temp(:,2) = C'*u_temp(:,2); + v_temp(:,2) = v_temp(:,2)./norm(v_temp(:,2), 2); + end + end + dim_v = sum(v_temp(:,2)~=0); + if ~dim_v + error(['No weights were included in the model, this should never '... + 'happen. Try increasing lv.']); + end + + + %--- Check convergence + diff_u = norm(u_temp(:,2) - u_temp(:,1)); + diff_v = norm(v_temp(:,2) - v_temp(:,1)); + if diff_u >= diff_v, diff = diff_u; else diff = diff_v; end + % update u and v for the next iteration + u_temp(:,1) = u_temp(:,2); + v_temp(:,1) = v_temp(:,2); + + if i >= itr_lim + warning('Maximum number of iterations reached.'); + success = false; + end + + i = i+1; +end + +if failed_sparsity_u + warning(['There was a problem with the delta estimation in u.' ... + ' The solution was forced to be non-sparse. Take results with a grain of salt.']); + success = false; +end + +if failed_sparsity_v + warning(['There was a problem with the delta estimation in v.' ... + ' The solution was forced to be non-sparse. Take results with a grain of salt.']); + success = false; +end + +% fprintf('SPLS: itr: %d diff: %.2e dim_u: %d dim_v: %d\n', i, diff, dim_u, dim_v); + +%--- Add converged weight vectors to output +u = u_temp(:, end); +v = v_temp(:, end); + + +end + + +%--- Private functions +%-------------------------------------------------------------------------- +function [up, success] = update(w, c) + +success = true; + +%--- update values +delta = 0; +up = soft_thresh(w, delta); +up = up./norm(up,2); + +%--- check if it obeys the condition. If not, find delta that does. +if norm(up, 1) > c + + delta1 = delta; + delta2 = delta1+1.1; % delta2 must be > 1 + + % get first estimate of delta2 + flag = false; + i = 0; + max_delta = 0; + while ~flag + up = soft_thresh(w, delta2); + up = up./norm(up,2); + + if sum(abs(up)) == 0 || isnan(sum(abs(up))) % if everthing is zero, the up/|up| will be 0/0 = nan + delta2 = delta2/1.618; % They have to be diferent, otherwise it might not converge + elseif norm(up, 1) > c + delta1 = delta2; + delta2 = delta2*2; % They have to be diferent, otherwise it might not converge + elseif norm(up, 1) <= c + flag = true; + end + + if delta2>max_delta, max_delta = delta2;end + + if delta2 == 0 + warning('Delta has to be zero.'); + success = false; + break + end + i = i+1; + if i>1E4 + warning('First delta estimation update did not converge.'); + delta1 = 0; + delta2 = max_delta; + break + end + end + + + up = bisec(w, c, delta1, delta2); + if isempty(up) || sum(isnan(up))>0 + warning('Delta estimation unsuccessful.') + success = false; + end + + +end + + + +end + +function out = soft_thresh(a,delta) +% Performs soft threshold (it does not normalize the output) +diff = abs(a)-delta; +diff(diff<0) = 0; +out = sign(a).*diff; + +end + + +function out = bisec(K, c, x1,x2) +converge = false; +success = true; +tolerance = 1E-6; +while ~converge && success + x = (x2 + x1) / 2; + out = soft_thresh(K, x); + out = out./norm(out,2); + if sum(abs(out)) == 0 + x2 = x; + elseif norm(out, 1) > c + x1 = x; + elseif norm(out, 1) < c + x2 = x; + end + + diff = abs(norm(out, 1) - c); + if diff <= tolerance + converge = true; + elseif isnan(sum(diff)) + success = false; + out = nan(size(K)); + end +end +end \ No newline at end of file diff --git a/Visualization_Module/dp_spls_slim.m b/Visualization_Module/dp_spls_slim.m new file mode 100644 index 0000000..1a8772c --- /dev/null +++ b/Visualization_Module/dp_spls_slim.m @@ -0,0 +1,17 @@ +%% DP function for one k split + +function RHO = dp_spls_slim(training_data_x,training_data_y,test_data_x, test_data_y, cu, cv, correlation_method, V_original) + +%perform SPLS on the training data using the current cu/cv combination +if exist('V_original', 'var') + [u, v, ~, ~, ~, ~] = dp_spls(training_data_x, training_data_y, cu, cv, V_original); +else + [u, v, ~, ~, ~, ~] = dp_spls(training_data_x, training_data_y, cu, cv); +end + +%compute the correlation between the projections of the training and +%test matrices onto the SPLS latent space spanned by the weight vectors + +RHO = dp_projection(test_data_x, test_data_y, u, v, correlation_method); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_standardize.m b/Visualization_Module/dp_standardize.m new file mode 100644 index 0000000..447c1ae --- /dev/null +++ b/Visualization_Module/dp_standardize.m @@ -0,0 +1,48 @@ +%% DP function for standardizing + +function [Z, IN] = dp_standardize(X, IN) + +Z = nan(size(X,1),size(X,2)); + +if isfield(IN, 'method') + method = IN.method; +else + method = 'mean-centering'; +end + +switch method + case 'min_max' + if isfield(IN, 'min') + for i=1:size(X,2) + Z(:,i) = (X(:,i)-IN.min(i))./(IN.max(i)-IN.min(i)); + end + else + for i=1:size(X,2) + IN.max(i) = max(X(:,i)); + IN.min(i) = min(X(:,i)); + end + for i=1:size(X,2) + Z(:,i) = (X(:,i)-IN.min(i))./(IN.max(i)-IN.min(i)); + end + end + case 'mean-centering' + if isfield(IN, 'means') + for i=1:size(X,2) + Z(:,i) = (X(:,i)-IN.means(i))/IN.stds(i); + if any(isnan(Z(:,i))) + Z(:,i) = X(:,i); + end + end + else + IN.means = mean(X,1); + IN.stds = std(X,1); + for i=1:size(X,2) + Z(:,i) = (X(:,i)-IN.means(i))/IN.stds(i); + if any(isnan(Z(:,i))) + Z(:,i) = X(:,i); + end + end + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_standardize_comb.m b/Visualization_Module/dp_standardize_comb.m new file mode 100644 index 0000000..fccc021 --- /dev/null +++ b/Visualization_Module/dp_standardize_comb.m @@ -0,0 +1,10 @@ +%% DP function for standardizing +% function to standardize X according to method specified in IN and then +% applying the betas of X onto Y +function [TRAIN_s, TEST_s] = dp_standardize_comb(TRAIN, TEST, IN) + +[TRAIN_s,IN] = dp_standardize(TRAIN, IN); + +[TEST_s,~] = dp_standardize(TEST, IN); + +end \ No newline at end of file diff --git a/Visualization_Module/dp_trainmerge.m b/Visualization_Module/dp_trainmerge.m new file mode 100644 index 0000000..3042aae --- /dev/null +++ b/Visualization_Module/dp_trainmerge.m @@ -0,0 +1,37 @@ +%% DP function for training/retraining and merging training results + +function [RHO_opt, u_opt, v_opt, V_opt] = dp_trainmerge(RHO_collection, u_collection, weights_u_collection, v_collection, weights_v_collection, V_collection, weights_V_collection, type_merge) + +switch type_merge + case 'mean' + RHO_opt = mean(RHO_collection); + u_opt = mean(u_collection,2); + v_opt = mean(v_collection,2); + V_opt = mean(V_collection,3); + if V_opt==0 + V_opt = false; + end + case 'median' + RHO_opt = median(RHO_collection); + u_opt = median(u_collection,2); + v_opt = median(v_collection,2); + V_opt = median(V_collection,3); + case 'weighted_mean' + u_opt = wmean(u_collection, weights_u_collection, 2)'; + v_opt = wmean(v_collection, weights_v_collection, 2)'; + try V_opt = wmean(V_collection, weights_V_collection, 3); + catch + V_opt = false; + end + RHO_opt = wmean(RHO_collection, RHO_collection); + case 'best' + [RHO_opt,I_max] = max(RHO_collection); + u_opt = u_collection(:,I_max); + v_opt = v_collection(:,I_max); + try V_opt = V_collection(:,:,I_max); + catch + V_opt = false; + end +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_trainmerge_single.m b/Visualization_Module/dp_trainmerge_single.m new file mode 100644 index 0000000..d42edff --- /dev/null +++ b/Visualization_Module/dp_trainmerge_single.m @@ -0,0 +1,16 @@ +%% DP function for training/retraining and merging training results + +function values_merged = dp_trainmerge_single(values, type_merge, dim, weights) + +switch type_merge + case 'mean' + values_merged = mean(values, dim); + case 'median' + values_merged = median(values, dim); + case 'weighted_mean' + values_merged = wmean(values, weights, dim); + case 'best' + values_merged = max(values, dim); +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_visualize_data.m b/Visualization_Module/dp_visualize_data.m new file mode 100644 index 0000000..5a4d0db --- /dev/null +++ b/Visualization_Module/dp_visualize_data.m @@ -0,0 +1,584 @@ +%% function to visualize SPLS output + +function dp_visualize_data(IN) + +switch IN.overall_analysis + case 'Stress' + overall_folder = '/volume/HCStress/Analysis/Stress'; + case 'Resilience' + overall_folder = '/volume/HCStress/Analysis/Resilience'; +end + +if ~isfield(IN, 'specific') + all_jobs = true; + IN.specific = 'empty'; +else + all_jobs = false; +end + +%% visualize results +load(IN.results_path); +collection_folder = [overall_folder, '/', input.name]; +mkdir(collection_folder); + +input.behavior_names = strrep(input.behavior_names, '_T0', ''); +load(input.NM_structure); +load('/volume/HCStress/Doc/Stress_Resilience_questionnaires.mat'); + +% define column names for matrices so that you can access them later by +% indexing +% output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_parameters_names_long = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop + +% indices for later use +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names, 'p'); +opt_RHO = strcmp(output.parameters_names, 'RHO'); +index_epsilon = strcmp(output.parameters_names, 'epsilon'); +index_omega = strcmp(output.parameters_names, 'omega'); + +% results_folder = IN.results_path(1:(strfind(IN.results_path, '/result.mat')-1)); + +% cd(results_folder); +% +% % write brain vector to nifti file +% for i=1:size(output.final_parameters,1) +% nk_WriteVol(output.final_parameters{i,4}, ['brain_LV' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); +% end + +if any(strcmp(IN.specific, 'images') | all_jobs) + cd(collection_folder); + % write brain vector to nifti file + for i=1:size(output.final_parameters,1) + nk_WriteVol(output.final_parameters{i,4}, ['brain_LV' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % dp_resample_image([collection_folder, '/brain_LV' num2str(i)], [1 1 1]); + end +end + + +if any(strcmp(IN.specific, 'atlas') | all_jobs) + + % get clusters for brain regions using hammers and aal atlas + % filepath hammers nifti: /opt/SPM/spm12_v6685_cat12_r1207/atlas/hammers.nii + % filepath hammers description: /opt/SPM/spm12_v6685_cat12_r1207/atlas/labels_dartel_hammers.xml + + switch size(input.behavior,1) + case 626 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__626_NM_X.mat'; + case 627 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__627_NM_X.mat'; + case 631 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__631_NM_X.mat'; + case 634 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__634_NM_X.mat'; + end + + load(hammers_path_full); + [C_hammers, ~, ic_hammers] = unique(atlas_hammers_full_for_analysis); + counts_hammers = accumarray(ic_hammers, 1); + output.regions_hammers_count = [C_hammers', counts_hammers]; + load('/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30-ancillary-data.tar/Hammers_mith-n30-ancillary-data/Hammers_mith-n30-ancillary-data/labels_hammerssmith_n30_ancillary_data.mat'); + output.regions.count_names = {'region_number', 'n_voxels', 'voxel_percentage', 'region_name', 'median_weights', 'mean_weights'}; + output.regions.count = struct('raw', [], 'voxels', [], 'weights', []); + output.regions.count_names_sorting = {'positive', 'negative', 'overall'}; + voxel_cutoff = 10; + fields = fieldnames(output.regions.count); + for i=1:size(fields,1) + for fff = 1:size(output.regions.count_names_sorting,2) + FID = fopen([collection_folder, '/brain_regions_hammers_', output.regions.count_names_sorting{fff}, '_', fields{i}, '.txt'], 'w'); + fprintf(FID, [strrep(input.name,'_',' '), '\n', fields{i} ' sorted']); + fclose(FID); + end + end + + for i=1:size(output.final_parameters,1) + output.regions.log{i,1} = output.final_parameters{i,4}>0; + output.regions.log{i,2} = output.final_parameters{i,4}<0; + output.regions.log{i,3} = output.final_parameters{i,4}~=0; + for ii=1:size(output.regions.log,2) + output.regions.sum{i,ii} = atlas_hammers_full_for_analysis((atlas_hammers_full_for_analysis~=0)' & output.regions.log{i,ii}); + [C, ~, ic] = unique(output.regions.sum{i,ii}); + a_counts = accumarray(ic, 1); + voxel_percentage = a_counts./(counts_hammers(ismember(C_hammers, C))); + output.regions.count.raw{i,ii} = [num2cell(C'), num2cell(a_counts), num2cell(voxel_percentage), labels_regions_hammers((C'),2)]; + fix_end = size(output.regions.count.raw{i,ii},2); + log_cutoff = cell2mat(output.regions.count.raw{i,ii}(:, find(strcmp(output.regions.count_names, 'n_voxels')))) 0 + ypos(xi) = -ygap;% Set y position, including gap + else + ypos(xi)=0; + end + else + if y_value(xi)~=0 + if y_value(xi)>0 && y_value(xi-1)>0 %abs(y_value(i)-y_value(i-1))<=ygap + ypos(xi) = ypos(xi-1) - ygap; + elseif y_value(xi)<0 && y_value(xi-1)<0 + ypos(xi) = ypos(xi-1) + ygap; + elseif y_value(xi) > 0 + ypos(xi) = -ygap; + elseif y_value(xi) < 0 + ypos(xi) = ygap; + end + else + ypos(xi)=0; + end + end + if y_value(xi)~=0 + htext = text(x_pos(xi),ypos(xi),strrep(input.behavior_names{xi},'_',' ')); % Add text label + set(htext,'VerticalAlignment','bottom','HorizontalAlignment','center', 'FontSize', 8); % Adjust properties + end + end + + for qq=1:size(questions_collection.items,1) + try questions_collection.final{qq} = [questions_collection.items{qq}, ': ', questions_collection.questions{1,qq}{1}]; + catch + questions_collection.final{qq} = [questions_collection.items{qq}]; + end + end + + try annotation(f, 'textbox', [0.79, 0.2, 0.16, 0.4], 'string', strrep(questions_collection.final, '_', ' '), 'FontSize', 8, 'FitHeightToText', 'on'); + catch + end + + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [collection_folder, '/behavior_LV_' num2str(i)], '-dpng', '-r0'); + saveas(f, [collection_folder, '/behavior_LV' num2str(i), '.fig']); + + close(f); + end + + %% plot latent scores epsilon and omega, color code diagnostic groups (HC, + % ROD, ROP, CHR) + + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.cv_outer.TestInd,2) +% temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; +% temp_1(output.cv_outer.TestInd{1,i}) = true; + output.cv_outer.TestInd{2,i} = input.data_collection.Labels(output.cv_outer.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + for i=1:size(output.final_parameters,1) + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', 20, 'color',colorpattern_LS(ii,:)); + hold on + end + first_line = strrep([input.name, ' grid_density=' num2str(input.grid_density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line}); % add third line + xlabel('epsilon(brain score)'); + ylabel('omega(behavior score)'); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', 18); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', 20); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [collection_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [collection_folder, '/latent_scores_LV' num2str(i), '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f) + end + + % plot latent scores all combined across significant LVs, colorcoded by LVs + colorpattern_LS = hsv(size(output.final_parameters,1)); + f=figure(); temp_legend = []; + for i=1:size(output.final_parameters,1) + x = output.final_parameters{i,index_epsilon}; + y = output.final_parameters{i,index_omega}; + plot(x,y,'.', 'MarkerSize', 20, 'color',colorpattern_LS(i,:)); + first_line = strrep([input.name, ' grid_density=' num2str(input.grid_density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + sig_line = 'significant'; + else + sig_line = 'not significant'; + end + title({first_line;second_line}); % add third line + hold on + temp_legend{i} = ['LV ', num2str(i), ': ', sig_line]; + end +% temp_legend{nn} = selected_variables{1,ii}; + xlabel('epsilon(brain score)'); + ylabel('omega(behavior score)'); + [~, lgd_data] = legend(temp_legend, 'Location', 'bestoutside', 'FontSize', 18); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', 20); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [collection_folder, '/latent_scores_combined_LV_color'], '-dpng', '-r0'); + saveas(f, [collection_folder, '/latent_scores_combined_LV_color.fig']); + + close(f) + +% % standardize latent scores and plot all of them in one graph, colorcoded +% % by diagnoses +% % first transpose latent scores so that they fit function, then standardize +% % feature-wise (per LV) +% output.epsilon_stand = (dp_standardize(output.epsilon'))'; +% output.omega_stand = (dp_standardize(output.omega'))'; + + % plot all latent scores according to LVs, colorcoded by diagnoses + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + f=figure(); + for i=1:size(output.final_parameters,1) + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', 20, 'color',colorpattern_LS(ii,:)); + hold on + end + end + end + first_line = strrep([input.name, ', grid_density=' num2str(input.grid_density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line = ['significant']; + % else + % third_line = ['not significant']; + % end + title({first_line; second_line}); % add third line + xlabel('epsilon(brain score)'); + ylabel('omega(behavior score)'); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', 18); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', 20); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [collection_folder, '/latent_scores_combined_diagnosis_color'], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [collection_folder, '/latent_scores_combined_diagnosis_color', '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f); + +end + + +if any(strcmp(IN.specific, 'detailed') | all_jobs) + + detailed_results_folder = [IN.results_path(1:(strfind(IN.results_path, 'final')-1)), 'detailed_results']; + cd(detailed_results_folder); + + for i=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(i), '.mat']); + if exist('opt_parameters') + if size(opt_parameters,2) == 8 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters; + elseif exist('opt_parameters_temp') + if size(opt_parameters_temp,2) == 8 || size(opt_parameters_temp,2) == 10 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters_temp,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters_temp; + else + disp('Something''s wrong!'); + end + % opt_dir = [detailed_results_folder, '/opt_parameters_' num2str(i)]; + % mkdir(opt_dir); + % cd(opt_dir); + + collection_folder_opt = [collection_folder, '/opt_parameters_' num2str(i)]; + mkdir(collection_folder_opt); + cd(collection_folder_opt); + + % for ii=1:size(temp_opt_param,1) + % %% visualize results + % % write brain vector to nifti file + % nk_WriteVol(temp_opt_param{ii,opt_param_u}, ['brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % % dp_resample_image([collection_folder_opt, '/brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], [1 1 1]); + % end + + %% visualize behavior vector in barplot + + for ii=1:(size(temp_opt_param,1)) + x = temp_opt_param{ii,opt_param_v}; + f=subplot(round(size(temp_opt_param,1)/2),2,ii); + nn=0; + hold on + temp_legend=[]; temp_all = 0; + for iii=1:size(selected_variables,2) + switch class(selected_variables{2,iii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,iii}))}); + for iiii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,iii}.(fields{iiii}),2); + bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,iii}, ' ', strrep(fields{iiii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,iii},2); + bar((temp_all+1):(temp_all+size(temp_current,2)),x((temp_all+1):(temp_all+size(temp_current,2))), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+size(temp_current,2); + temp_legend{nn} = selected_variables{1,iii}; + hold on + end + end + axis([0 (size(temp_opt_param{ii,opt_param_v},1)+1) -1 1]); + xlabel({'\color{black}weight vector v'}, 'FontSize', 6); + ylabel('score', 'FontSize', 6); + if temp_opt_param{ii,opt_param_p}<=output.pvalue_FDR(i) + significance_opt = 'significant'; + else + significance_opt = 'not significant'; + end + subplot_title = {['Iteration ', num2str(ii), ', p-value (FDR-corrected) = ' num2str(temp_opt_param{ii,opt_param_p}), ', Spearman''s RHO = ', num2str(temp_opt_param{ii,opt_param_RHO}), significance_opt]}; + title(subplot_title, 'FontSize', 6, 'FontWeight', 'normal'); + + end + % set(gcf, 'Position', get(0, 'Screensize')); + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + first_line = strrep([input.name, ', grid_density=' num2str(input.grid_density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + suptitle({first_line; [second_line, ', ' third_line]}); % add third line + + % print([detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + + print([collection_folder_opt, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + saveas(f, [collection_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'png'); + + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'fig'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + close all; + end + +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_visualize_data.m~ b/Visualization_Module/dp_visualize_data.m~ new file mode 100644 index 0000000..173fd7d --- /dev/null +++ b/Visualization_Module/dp_visualize_data.m~ @@ -0,0 +1,587 @@ +%% function to visualize SPLS output + +function dp_visualize_data(IN) + +switch IN.overall_analysis + case 'Stress' + overall_folder = '/volume/HCStress/Analysis/Stress'; + case 'Resilience' + overall_folder = '/volume/HCStress/Analysis/Resilience'; +end + +if ~isfield(IN, 'specific') + all_jobs = true; + IN.specific = 'empty'; +else + all_jobs = false; +end + +%% visualize results +load(IN.results_path); +collection_folder = [overall_folder, '/', input.name]; +mkdir(collection_folder); + +input.behavior_names = strrep(input.behavior_names, '_T0', ''); +load(input.NM_structure); +load('/volume/HCStress/Doc/Stress_Resilience_questionnaires.mat'); + +% define column names for matrices so that you can access them later by +% indexing +% output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_parameters_names_long = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop + +% indices for later use +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names, 'p'); +opt_RHO = strcmp(output.parameters_names, 'RHO'); +index_epsilon = strcmp(output.parameters_names, 'epsilon'); +index_omega = strcmp(output.parameters_names, 'omega'); + +% results_folder = IN.results_path(1:(strfind(IN.results_path, '/result.mat')-1)); + +% cd(results_folder); +% +% % write brain vector to nifti file +% for i=1:size(output.final_parameters,1) +% nk_WriteVol(output.final_parameters{i,4}, ['brain_LV' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); +% end + +if any(strcmp(IN.specific, 'images') | all_jobs) + cd(collection_folder); + % write brain vector to nifti file + for i=1:size(output.final_parameters,1) + nk_WriteVol(output.final_parameters{i,4}, ['brain_LV' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % dp_resample_image([collection_folder, '/brain_LV' num2str(i)], [1 1 1]); + end +end + + +if any(strcmp(IN.specific, 'atlas') | all_jobs) + + % get clusters for brain regions using hammers and aal atlas + % filepath hammers nifti: /opt/SPM/spm12_v6685_cat12_r1207/atlas/hammers.nii + % filepath hammers description: /opt/SPM/spm12_v6685_cat12_r1207/atlas/labels_dartel_hammers.xml + + for aa=1:size(atlases,2) + + switch size(input.behavior,1) + case 626 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__626_NM_X.mat'; + case 627 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__627_NM_X.mat'; + case 631 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__631_NM_X.mat'; + case 634 + hammers_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__634_NM_X.mat'; + end + + load(hammers_path_full); + [C_hammers, ~, ic_hammers] = unique(atlas_hammers_full_for_analysis); + counts_hammers = accumarray(ic_hammers, 1); + output.regions_hammers_count = [C_hammers', counts_hammers]; + load('/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30-ancillary-data.tar/Hammers_mith-n30-ancillary-data/Hammers_mith-n30-ancillary-data/labels_hammerssmith_n30_ancillary_data.mat'); + output.regions.count_names = {'region_number', 'n_voxels', 'voxel_percentage', 'region_name', 'median_weights', 'mean_weights'}; + output.regions.count = struct('raw', [], 'voxels', [], 'weights', []); + output.regions.count_names_sorting = {'positive', 'negative', 'overall'}; + voxel_cutoff = 10; + fields = fieldnames(output.regions.count); + for i=1:size(fields,1) + for fff = 1:size(output.regions.count_names_sorting,2) + FID = fopen([collection_folder, '/brain_regions_hammers_', output.regions.count_names_sorting{fff}, '_', fields{i}, '.txt'], 'w'); + fprintf(FID, [strrep(input.name,'_',' '), '\n', fields{i} ' sorted']); + fclose(FID); + end + end + + for i=1:size(output.final_parameters,1) + output.regions.log{i,1} = output.final_parameters{i,4}>0; + output.regions.log{i,2} = output.final_parameters{i,4}<0; + output.regions.log{i,3} = output.final_parameters{i,4}~=0; + for ii=1:size(output.regions.log,2) + output.regions.sum{i,ii} = atlas_hammers_full_for_analysis((atlas_hammers_full_for_analysis~=0)' & output.regions.log{i,ii}); + [C, ~, ic] = unique(output.regions.sum{i,ii}); + a_counts = accumarray(ic, 1); + voxel_percentage = a_counts./(counts_hammers(ismember(C_hammers, C))); + output.regions.count.raw{i,ii} = [num2cell(C'), num2cell(a_counts), num2cell(voxel_percentage), labels_regions_hammers((C'),2)]; + fix_end = size(output.regions.count.raw{i,ii},2); + log_cutoff = cell2mat(output.regions.count.raw{i,ii}(:, find(strcmp(output.regions.count_names, 'n_voxels')))) 0 + ypos(xi) = -ygap;% Set y position, including gap + else + ypos(xi)=0; + end + else + if y_value(xi)~=0 + if y_value(xi)>0 && y_value(xi-1)>0 %abs(y_value(i)-y_value(i-1))<=ygap + ypos(xi) = ypos(xi-1) - ygap; + elseif y_value(xi)<0 && y_value(xi-1)<0 + ypos(xi) = ypos(xi-1) + ygap; + elseif y_value(xi) > 0 + ypos(xi) = -ygap; + elseif y_value(xi) < 0 + ypos(xi) = ygap; + end + else + ypos(xi)=0; + end + end + if y_value(xi)~=0 + htext = text(x_pos(xi),ypos(xi),strrep(input.behavior_names{xi},'_',' ')); % Add text label + set(htext,'VerticalAlignment','bottom','HorizontalAlignment','center', 'FontSize', 8); % Adjust properties + end + end + + for qq=1:size(questions_collection.items,1) + try questions_collection.final{qq} = [questions_collection.items{qq}, ': ', questions_collection.questions{1,qq}{1}]; + catch + questions_collection.final{qq} = [questions_collection.items{qq}]; + end + end + + try annotation(f, 'textbox', [0.79, 0.2, 0.16, 0.4], 'string', strrep(questions_collection.final, '_', ' '), 'FontSize', 8, 'FitHeightToText', 'on'); + catch + end + + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [collection_folder, '/behavior_LV_' num2str(i)], '-dpng', '-r0'); + saveas(f, [collection_folder, '/behavior_LV' num2str(i), '.fig']); + + close(f); + end + + %% plot latent scores epsilon and omega, color code diagnostic groups (HC, + % ROD, ROP, CHR) + + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.cv_outer.TestInd,2) + % temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; + % temp_1(output.cv_outer.TestInd{1,i}) = true; + output.cv_outer.TestInd{2,i} = input.data_collection.Labels(output.cv_outer.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + for i=1:size(output.final_parameters,1) + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', 20, 'color',colorpattern_LS(ii,:)); + hold on + end + first_line = strrep([input.name, ' grid_density=' num2str(input.grid_density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line}); % add third line + xlabel('epsilon(brain score)'); + ylabel('omega(behavior score)'); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', 18); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', 20); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [collection_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [collection_folder, '/latent_scores_LV' num2str(i), '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f) + end + + % plot latent scores all combined across significant LVs, colorcoded by LVs + colorpattern_LS = hsv(size(output.final_parameters,1)); + f=figure(); temp_legend = []; + for i=1:size(output.final_parameters,1) + x = output.final_parameters{i,index_epsilon}; + y = output.final_parameters{i,index_omega}; + plot(x,y,'.', 'MarkerSize', 20, 'color',colorpattern_LS(i,:)); + first_line = strrep([input.name, ' grid_density=' num2str(input.grid_density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + sig_line = 'significant'; + else + sig_line = 'not significant'; + end + title({first_line;second_line}); % add third line + hold on + temp_legend{i} = ['LV ', num2str(i), ': ', sig_line]; + end + % temp_legend{nn} = selected_variables{1,ii}; + xlabel('epsilon(brain score)'); + ylabel('omega(behavior score)'); + [~, lgd_data] = legend(temp_legend, 'Location', 'bestoutside', 'FontSize', 18); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', 20); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [collection_folder, '/latent_scores_combined_LV_color'], '-dpng', '-r0'); + saveas(f, [collection_folder, '/latent_scores_combined_LV_color.fig']); + + close(f) + + % % standardize latent scores and plot all of them in one graph, colorcoded + % % by diagnoses + % % first transpose latent scores so that they fit function, then standardize + % % feature-wise (per LV) + % output.epsilon_stand = (dp_standardize(output.epsilon'))'; + % output.omega_stand = (dp_standardize(output.omega'))'; + + % plot all latent scores according to LVs, colorcoded by diagnoses + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + f=figure(); + for i=1:size(output.final_parameters,1) + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.cv_outer.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', 20, 'color',colorpattern_LS(ii,:)); + hold on + end + end + end + first_line = strrep([input.name, ', grid_density=' num2str(input.grid_density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line = ['significant']; + % else + % third_line = ['not significant']; + % end + title({first_line; second_line}); % add third line + xlabel('epsilon(brain score)'); + ylabel('omega(behavior score)'); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', 18); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', 20); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [collection_folder, '/latent_scores_combined_diagnosis_color'], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [collection_folder, '/latent_scores_combined_diagnosis_color', '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f); + +end + + +if any(strcmp(IN.specific, 'detailed') | all_jobs) + + detailed_results_folder = [IN.results_path(1:(strfind(IN.results_path, 'final')-1)), 'detailed_results']; + cd(detailed_results_folder); + + for i=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(i), '.mat']); + if exist('opt_parameters') + if size(opt_parameters,2) == 8 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters; + elseif exist('opt_parameters_temp') + if size(opt_parameters_temp,2) == 8 || size(opt_parameters_temp,2) == 10 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters_temp,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters_temp; + else + disp('Something''s wrong!'); + end + % opt_dir = [detailed_results_folder, '/opt_parameters_' num2str(i)]; + % mkdir(opt_dir); + % cd(opt_dir); + + collection_folder_opt = [collection_folder, '/opt_parameters_' num2str(i)]; + mkdir(collection_folder_opt); + cd(collection_folder_opt); + + % for ii=1:size(temp_opt_param,1) + % %% visualize results + % % write brain vector to nifti file + % nk_WriteVol(temp_opt_param{ii,opt_param_u}, ['brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % % dp_resample_image([collection_folder_opt, '/brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], [1 1 1]); + % end + + %% visualize behavior vector in barplot + + for ii=1:(size(temp_opt_param,1)) + x = temp_opt_param{ii,opt_param_v}; + f=subplot(round(size(temp_opt_param,1)/2),2,ii); + nn=0; + hold on + temp_legend=[]; temp_all = 0; + for iii=1:size(selected_variables,2) + switch class(selected_variables{2,iii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,iii}))}); + for iiii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,iii}.(fields{iiii}),2); + bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,iii}, ' ', strrep(fields{iiii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,iii},2); + bar((temp_all+1):(temp_all+size(temp_current,2)),x((temp_all+1):(temp_all+size(temp_current,2))), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+size(temp_current,2); + temp_legend{nn} = selected_variables{1,iii}; + hold on + end + end + axis([0 (size(temp_opt_param{ii,opt_param_v},1)+1) -1 1]); + xlabel({'\color{black}weight vector v'}, 'FontSize', 6); + ylabel('score', 'FontSize', 6); + if temp_opt_param{ii,opt_param_p}<=output.pvalue_FDR(i) + significance_opt = 'significant'; + else + significance_opt = 'not significant'; + end + subplot_title = {['Iteration ', num2str(ii), ', p-value (FDR-corrected) = ' num2str(temp_opt_param{ii,opt_param_p}), ', Spearman''s RHO = ', num2str(temp_opt_param{ii,opt_param_RHO}), significance_opt]}; + title(subplot_title, 'FontSize', 6, 'FontWeight', 'normal'); + + end + % set(gcf, 'Position', get(0, 'Screensize')); + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + first_line = strrep([input.name, ', grid_density=' num2str(input.grid_density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + suptitle({first_line; [second_line, ', ' third_line]}); % add third line + + % print([detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + + print([collection_folder_opt, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + saveas(f, [collection_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'png'); + + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'fig'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + close all; + end + +end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_visualize_data_Dev.m b/Visualization_Module/dp_visualize_data_Dev.m new file mode 100644 index 0000000..a055a1c --- /dev/null +++ b/Visualization_Module/dp_visualize_data_Dev.m @@ -0,0 +1,955 @@ +%% function to visualize SPLS output + +function dp_visualize_data_Dev(IN) + +load(IN.results_path); + +switch IN.overall_analysis + case 'Stress' + overall_folder = '/volume/HCStress/Analysis/Stress'; + case 'Resilience' + overall_folder = '/volume/HCStress/Analysis/Resilience'; + case 'Schizotypy' + overall_folder = '/volume/MU_Pronia_PLS_Schizotypy/Analysis/SPLS/Schizotypy'; +end + + + +folder_name = [setup.date, '_', input.name]; +if contains(IN.results_path, 'final_vis') + collection_folder = [overall_folder, '/', folder_name, '/final_vis']; + val_log = false; +elseif contains(IN.results_path, 'validation_vis') + collection_folder = [overall_folder, '/', folder_name, '/validation_vis']; + val_log = true; +end + +mkdir(collection_folder); + +if ~isfield(IN, 'specific') + all_jobs = true; + IN.specific = 'empty'; +else + all_jobs = false; +end + +marker_size = 14; +font_size = 14; +LV_x = 'clinical features'; +LV_y = 'weights'; +LS_epsilon = 'brain score'; +LS_omega = 'behavior score'; + +%% compute sociodemographic and clinical outcome correlations +if any(strcmp(IN.specific, 'sociodemographic') | all_jobs) + [input, output, setup] = dp_sociodemographic_2020_full(IN); +end + +if val_log + output.final_parameters = output.validation_results; +end + +input.Y_names = strrep(input.Y_names, '_T0', ''); +load(input.NM_structure); + +%% prepare questionnaire data +load('/volume/HCStress/Doc/all_questionnaires.mat', 'BS_questionnaire', 'CISS_questionnaire', 'CTQ_questionnaire', 'RSA_questionnaire', 'WSS_questionnaire'); +all_questionnaires = [CISS_questionnaire', RSA_questionnaire', CTQ_questionnaire', BS_questionnaire', WSS_questionnaire']; + +% define column names for matrices so that you can access them later by +% indexing +% output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_parameters_names_long = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop + +% indices for later use +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names, 'p'); +opt_RHO = strcmp(output.parameters_names, 'RHO'); +index_epsilon = strcmp(output.parameters_names, 'epsilon'); +index_omega = strcmp(output.parameters_names, 'omega'); +index_epsilon_all = strcmp(output.parameters_names, 'epsilon_all'); +index_omega_all = strcmp(output.parameters_names, 'omega_all'); + +if any(strcmp(IN.specific, 'images') | all_jobs) + i_folder = [collection_folder, '/images']; + mkdir(i_folder); + cd(i_folder); + % write brain vector to nifti file + for i=1:size(output.final_parameters,1) + nk_WriteVol(output.final_parameters{i,4}, ['brain_LV_final_' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + end +end + +if any(strcmp(IN.specific, 'atlas') | all_jobs) + a_folder = [collection_folder, '/atlases']; + mkdir(a_folder); + atlases = {'brainnetome', 'cerebellum', 'glasser', 'yeo', 'buckner'}; + % get clusters for brain regions using hammers and aal atlas + % filepath hammers nifti: /opt/SPM/spm12_v6685_cat12_r1207/atlas/hammers.nii + % filepath hammers description: /opt/SPM/spm12_v6685_cat12_r1207/atlas/labels_dartel_hammers.xml + + for aa=1:size(atlases,2) + + switch atlases{aa} + case 'glasser' + switch size(input.behavior,1) + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Glasser_Atlas/glasser_atlas_CISS_636_NM_X.mat'; + case 652 + atlas_path_full = '/volume/HCStress/Data/MRI/Glasser_Atlas/glasser_atlas_WSS_652_NM_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Glasser_Atlas/glasser_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + case 'yeo' + switch size(input.behavior,1) + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Yeo_Atlas/Yeo_17strict_CISS_636_NM_X.mat'; + case 652 + atlas_path_full = '/volume/HCStress/Data/MRI/Yeo_Atlas/Yeo_17strict_WSS_652_NM_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Yeo_Atlas/yeo_buckner_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + case 'buckner' + switch size(input.behavior,1) + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Buckner_Atlas/Buckner_17tight_CISS_636_NM_X.mat'; + case 652 + atlas_path_full = '/volume/HCStress/Data/MRI/Buckner_Atlas/Buckner_17tight_WSS_652_NM_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Yeo_Atlas/yeo_buckner_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + case 'brainnetome' + switch size(input.behavior,1) + case 621 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_CISSRSAPAS_3mm_621_X.mat'; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_627_CTQ_BS_NM_X.mat'; + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_634_CISS_RSA_NM_X.mat'; + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_636_CISS_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_hammers_juelich_3mm_649_X.mat'; + a_number = 1; + case 652 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_652_WSS_NM_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + % for i=1:size(labels_regions,1) + % strf = strfind(labels_regions{i,2}, '_'); + % labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + % end + case 'cerebellum' + switch size(input.behavior,1) + case 621 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_621_CISSRSAPAS_X.mat'; + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_636_CISS_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_649_CTQ_X.mat'; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_634_CISSRSA_X.mat'; + case 652 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_652_WSS_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/CerebellumMNIflirt_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + + end + + temp=load(atlas_path_full); + fields = fieldnames(temp); + if exist('a_number', 'var') + atlas_for_analysis = round(temp.(fields{1})(a_number,:)); + else + atlas_for_analysis = round(temp.(fields{1})); + end + + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + + if any(contains('buckner', atlases{aa})) + log_no_regions = ismember(atlas_for_analysis, [1, 5, 14]); + atlas_for_analysis(log_no_regions) = 0; + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + temp=[]; + for bb=1:17 + if any(bb==C_atlas) + temp(bb,:) = [C_atlas(bb==C_atlas), counts_atlas(bb==C_atlas)]; + else + temp(bb,:) = [bb, 0]; + end + end + temp = [C_atlas(1), counts_atlas(1); temp]; + C_atlas = temp(:,1)'; + counts_atlas = temp(:,2); + end + + output.regions_max.(atlases{aa}) = [C_atlas(2:end)', counts_atlas(2:end)]; + + output.regions.(atlases{aa}).regions_count = [C_atlas', counts_atlas]; + output.regions.(atlases{aa}).count_names = {'region_number', 'n_voxels', 'voxel_percentage', 'region_name', 'median_weights', 'mean_weights'}; + output.regions.(atlases{aa}).count = struct('raw', [], 'voxels', [], 'weights', []); + output.regions.(atlases{aa}).count_names_sorting = {'positive', 'negative', 'overall'}; + voxel_cutoff = 0; + fields = fieldnames(output.regions.(atlases{aa}).count); + for i=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count_names_sorting,2) + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_', fields{i}, '.txt'], 'w'); + fprintf(FID, [strrep(input.name,'_',' '), '\n', fields{i} ' sorted']); + fclose(FID); + end + end + + for i=1:size(output.final_parameters,1) + output.regions.(atlases{aa}).log{i,1} = output.final_parameters{i,4}>0; + output.regions.(atlases{aa}).log{i,2} = output.final_parameters{i,4}<0; + output.regions.(atlases{aa}).log{i,3} = output.final_parameters{i,4}~=0; + for ii=1:size(output.regions.(atlases{aa}).log,2) + output.regions.(atlases{aa}).sum{i,ii} = atlas_for_analysis((atlas_for_analysis~=0)' & output.regions.(atlases{aa}).log{i,ii}); + [C, ~, ic] = unique(output.regions.(atlases{aa}).sum{i,ii}); + a_counts = accumarray(ic, 1); + voxel_percentage = a_counts./(counts_atlas(ismember(C_atlas, C))); + output.regions.(atlases{aa}).count.raw{i,ii} = [num2cell(C'), num2cell(a_counts), num2cell(voxel_percentage), labels_regions((C'),2)]; + fix_end = size(output.regions.(atlases{aa}).count.raw{i,ii},2); + log_cutoff = cell2mat(output.regions.(atlases{aa}).count.raw{i,ii}(:, find(strcmp(output.regions.(atlases{aa}).count_names, 'n_voxels')))) 0 + strf = strfind(mat{1,4}, ' '); + temp_prefix = mat{1,4}(1:(strf-1)); + strf_log = ~cellfun(@isempty, strfind(mat(:,4), temp_prefix)); + temp_mat = [temp_mat; mat(strf_log,:)]; + output.regions.(atlases{aa}).count.alphabet{b,bb} = temp_mat; + mat(strf_log,:)=[]; + end + end + end + + else + temp = output.regions.(atlases{aa}).count.voxels; + for b=1:size(temp,1) + for bb=1:size(temp,2) + mat = temp{b,bb}; + if ~isempty(mat) + [~,idu] = sort([mat(:,4)]'); + output.regions.(atlases{aa}).count.alphabet{b,bb} = mat(idu,:); + else + output.regions.(atlases{aa}).count.alphabet{b,bb} = mat; + end + end + end + + end + + for fi=1:size(output.regions.(atlases{aa}).count.alphabet,1) + temp_add = []; + temp_print = output.regions.(atlases{aa}).count_names([1,2,4,5,3]); + for f=1:size(temp_print,2) + % try output.regions.(atlases{aa}).count_names{f} = num2str(temp_print{f}); + % catch ME + % end + temp_add = [temp_add, sprintf('\t'), temp_print{f}]; + end + % fields = fieldnames(output.regions.(atlases{aa}).count); + % for ff=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count.alphabet,2) + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_alphabet.txt'], 'a'); + fprintf(FID, ['\n \n \n', sprintf('\t'), 'Latent Variable ' num2str(fi), '\n \n', temp_add, '\n']); + fclose(FID); + end + % end + + % fields = fieldnames(output.regions.(atlases{aa}).count); + % for ff=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count.alphabet,2) + if ~isempty(output.regions.(atlases{aa}).count.alphabet{fi,fff}) + for r=1:size(output.regions.(atlases{aa}).count.alphabet{fi,fff},1) + temp_print = output.regions.(atlases{aa}).count.alphabet{fi,fff}(r,[1,2,4,5,3]); + temp_add = []; + for ii=1:size(temp_print,2) + try temp_print{ii} = num2str(temp_print{ii}); + catch ME + end + temp_add = [temp_add, sprintf('\t'), temp_print{ii}]; + end + + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_alphabet.txt'], 'a'); + fprintf(FID, ['\n' temp_add]); + fclose(FID); + end + end + end + % end + end + + end + + end + % just deactivated for paper preparation, needs to be reactivated + % later on + try temp = load(input.X); + catch temp = load(input.MRI); + end + field = fieldnames(temp); + MRI_volumes = temp.(field{1}); + for i=1:size(output.final_parameters,1) + u=output.final_parameters{i,4}; + % log_u{1,1} = u>0; + % log_u{1,2} = u<0; + for ii=1:(size(output.regions.(atlases{aa}).log,2)-1) + log_weights = output.regions.(atlases{aa}).log{i,ii}'; + for iii=1:size(output.regions.(atlases{aa}).count.alphabet{i,ii},1) + log_region = atlas_for_analysis == output.regions.(atlases{aa}).count.alphabet{i,ii}{iii,1}; + output.volumes.(atlases{aa}).names{i,ii} = [output.regions.(atlases{aa}).count.alphabet{i,ii}(:,1)';output.regions.(atlases{aa}).count.alphabet{i,ii}(:,4)']; + % for s=1:size(MRI_volumes,1) + output.volumes.(atlases{aa}).raw{i,ii}(:,iii) = sum(MRI_volumes(:,log_region&log_weights),2); + % output.volumes.(['LV_', num2str(i)]).weighted.(atlases{aa}){s,iii} = sum(output.final_parameters{i,4}(log_region&log_weights)' .* MRI_volumes(s,log_region&log_weights)); + % end + end + end + % MRI_volumes = MRI_volumes - (MRI_volumes*u)*u'; + end + + end + + temp=[]; names_atlases = {'yeo', 'buckner'}; + for an=1:size(names_atlases,2) + for rr=1:size(output.regions.(names_atlases{an}).count.raw,1) + for cc=1:size(output.regions.(names_atlases{an}).count.raw,2) + temp = output.regions.(names_atlases{an}).count.raw{rr,cc}; + temp_new={}; + for nr=1:size(labels_regions,1) + try log_find = ismember([temp{:,1}], labels_regions{nr,1}); + if any(log_find) + temp_new(nr,:) = [temp(log_find,:), temp{log_find,2}/output.regions_max.(names_atlases{an})(nr, 2)]; + else + temp_new(nr,:) = [nr, 0, 0, labels_regions((nr),2), 0, 0, 0]; + + end + catch + temp_new(nr,:) = [nr, 0, 0, labels_regions((nr),2), 0, 0, 0]; + end + end + output.regions.yeo_buckner_collection.(names_atlases{an}){rr,cc} = temp_new; + end + end + end + + save(IN.results_path, 'input', 'output', 'setup'); + + % get max voxels for cerebrum and cerebellum to compose yeo and buckner + % into one matrix + + temp = output.regions.yeo_buckner_collection.yeo; new_collection={}; + for cc=1:size(temp,2) + type_voxels = output.regions.(atlases{aa}).count_names_sorting{cc}; + name_txt = [a_folder, '/yeo_buckner_combined_', type_voxels, '.txt']; + for rr=1:size(temp,1) + writecell({['LV_', num2str(rr)]}, name_txt, 'WriteMode', 'append'); + temp_sum = [temp{rr,cc}{:,2}] + [output.regions.yeo_buckner_collection.buckner{rr,cc}{:,2}]; + temp_ratio = temp_sum'./(output.regions_max.yeo(:,2)+output.regions_max.buckner(:,2)); + new_temp_sum = sum(temp_sum(9:10)); + new_temp_ratio = new_temp_sum/sum((sum(output.regions_max.yeo(9:10,2), sum(output.regions_max.buckner(9:10,2))))); + temp_sum(9) = new_temp_sum; + temp_ratio(9) = new_temp_ratio; + temp_sum(10) = []; + temp_ratio(10) = []; + temp_array = [[temp{rr,cc}{[1:9, 11:17],1}]', temp_sum', temp_ratio]; + output.regions.yeo_buckner_collection.combined{rr,cc} = array2table(temp_array, 'VariableNames', {'region_number', 'voxels_sum', 'voxels_ratio'}, 'RowNames', labels_regions([1:9, 11:17],2)); + writetable(output.regions.yeo_buckner_collection.combined{rr,cc}, name_txt, 'WriteMode', 'append', 'WriteRowNames', true); + end + end + + save(IN.results_path, 'input', 'output', 'setup'); + % end + +end + + +if any(strcmp(IN.specific, 'behavior') | all_jobs) + + b_folder = [collection_folder, '/behavior']; + mkdir(b_folder); + + %% visualize behavior vector in barplot + % CTQ.emotional_abuse)), (CTQ.physical_abuse)), (CTQ.sexual_abuse)), (CTQ.emotional_neglect)), (CTQ.physical_neglect)), (CTQ.denial))]; + % load([setup.analysis_folder, '/' setup.date, '_', name, '_data_collection.mat']); + % if sum(ismember(input.Y_names, 'Age')) + sum(ismember(input.Y_names, 'Sex'))==2 + % selected_variables = [input.selected_features(1,:), 'Age', 'Sex'; input.selected_features(2,:), 1, 1]; + % elseif sum(ismember(input.Y_names, 'Age')) + % selected_variables = [input.selected_features(1,:), 'Age'; input.selected_features(2,:), 1]; + % elseif sum(ismember(input.Y_names, 'Sex')) + % selected_variables = [input.selected_features(1,:), 'Sex'; input.selected_features(2,:), 1]; + % else + % selected_variables = input.selected_features; + % end + + log_f=[]; + for i=1:size(input.selected_features,2) + log_f(i,:) = contains(input.Y_names(1,:), input.selected_features{1,i}); + end + + if size(log_f,1)>1 + log_c = sum(sum(log_f==0)==size(input.selected_features,2)); + else + log_c = sum((log_f==0)==size(input.selected_features,2)); + end + + selected_variables = [[input.selected_features(1,:), input.Y_names((end-(log_c-1)):end)]; [input.selected_features(2,:), num2cell(ones(1,log_c))]]; + % compute measures for effects + + log_coll = {index_epsilon, index_epsilon_all; index_omega, index_omega_all; 'test', 'all'}; + for l=1:size(log_coll,2) + for i=1:size(output.final_parameters,1) + x1 = output.final_parameters{i, log_coll{1, l}}; + x2 = output.final_parameters{i, log_coll{2, l}}; + output.effect_sizes.(log_coll{3,l}).Cohen(i) = computeCohen_d(x1, x2, 'independent'); + output.effect_sizes.(log_coll{3,l}).Spearman(i) = corr(x1, x2, 'Type', 'Spearman'); + output.effect_sizes.(log_coll{3,l}).Kendall(i) = corr(x1, x2, 'Type', 'Kendall'); + output.effect_sizes.(log_coll{3,l}).MI_kernel(i) = kernelmi(x1',x2'); + output.effect_sizes.(log_coll{3,l}).MI_peng(i) = mutualinfo(x1,x2); + mdl = fitlm(array2table([x1, x2], 'VariableNames', {'epsilon', 'omega'})); + output.effect_sizes.(log_coll{3,l}).R2(i) = mdl.Rsquared.Adjusted; + end + end + + count=0; + for i=1:size(input.selected_features(2,:),2) + temp=size(fieldnames(input.selected_features{2,i}),1); + count=count+temp; + end + colorpattern_LV = colorcube((count + log_c)); + + % collect items from vectors + for i=1:(size(output.final_parameters,1)) + x = output.final_parameters{i,opt_v}; + output.questions_collection.(['LV_', num2str(i)]).items = input.Y_names(x~=0)'; + output.questions_collection.(['LV_', num2str(i)]).subscales = input.subscales(x~=0)'; + for q=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + output.questions_collection.(['LV_', num2str(i)]).questions{q} = all_questionnaires(1,strcmp(all_questionnaires(2,:), output.questions_collection.(['LV_', num2str(i)]).items{q})); + end + + % errors = output.CI_v{i}; + f=figure(); + nn=0; + hold on + temp_legend=[]; temp_all = 0; %sel_temp_names=[]; + for ii=1:(size(selected_variables,2)) + switch class(selected_variables{2,ii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,ii}))}); + for iii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,ii}.(fields{iii}),2); + try temp_handle(nn) = dp_barwitherr(errors([(temp_all+1):(temp_all+temp_current)],:),(temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + catch + temp_handle(nn) = bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + end + % temp_names = input.Y_names((temp_all+1):(temp_all+temp_current)); + % sel_temp_names = [sel_temp_names; temp_names(x((temp_all+1):(temp_all+temp_current))~=0)]; + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,ii}, ' ', strrep(fields{iii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,ii},2); + try temp_handle(nn) = dp_barwitherr(errors([(temp_all+1):(temp_all+temp_current)],:),(temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + catch + temp_handle(nn) = bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + end + temp_all = temp_all+temp_current; + temp_legend{nn} = strrep(selected_variables{1,ii}, '_', ' '); + hold on + end + end + + if any(i==input.grid_dynamic.onset) + grid_x = input.grid_dynamic.(['LV_', num2str(i)]).x; + grid_y = input.grid_dynamic.(['LV_', num2str(i)]).y; + end + + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + + if ~val_log + p_sig_threshold = output.pvalue_FDR(i); + fourth_line = 'main sample'; + else + p_sig_threshold = 0.05; + fourth_line = 'validation sample'; + end + + if output.final_parameters{i,opt_p}<=p_sig_threshold + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line; fourth_line}); % add third line + xlabel(LV_x, 'FontSize', font_size); + ylabel(LV_y, 'FontSize', font_size); + + legend(temp_handle, temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + hold all + + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/behavior_LV_' num2str(i)], '-dpng', '-r0'); + saveas(f, [b_folder, '/behavior_LV' num2str(i), '.fig']); + % saveas(f, [b_folder, '/behavior_LV' num2str(i), '.eps']); + saveas(f,[b_folder, '/behavior_LV' num2str(i)],'epsc'); + + close(f); + output.questions_collection.(['LV_', num2str(i)]).final={}; + for qq=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + try output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}, ': ', output.questions_collection.(['LV_', num2str(i)]).questions{1,qq}{1}]; + catch + output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}]; + end + end + + output.questions_collection.(['LV_', num2str(i)]).combined = [output.questions_collection.(['LV_', num2str(i)]).final; output.questions_collection.(['LV_', num2str(i)]).subscales']; + + save(IN.results_path, 'input', 'output', 'setup'); + + end + + % print out questions + fields = fieldnames(output.questions_collection); + name_txt = [b_folder, '/collected_questions.txt']; + writecell(cell(1,1), name_txt); + + for i=1:size(fields,1) + writecell(['LV_', num2str(i); output.questions_collection.(['LV_', num2str(i)]).combined(1,:)'; cell(2,1)],[b_folder, '/collected_questions.txt'],'WriteMode','append') + end + + %% plot latent scores epsilon and omega, color code diagnostic groups (HC, + % ROD, ROP, CHR) + + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + % temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; + % temp_1(output.CV.cv_outer_indices.TestInd{1,i}) = true; + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + for i=1:size(output.final_parameters,1) + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = rescale(output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1, 1); + y = rescale(output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1, 1); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + + if ~val_log + p_sig_threshold = output.pvalue_FDR(i); + fourth_line = 'main sample'; + else + p_sig_threshold = 0.05; + fourth_line = 'validation sample'; + end + + if output.final_parameters{i,opt_p}<=p_sig_threshold + third_line = 'significant'; + else + third_line = 'not significant'; + end + + title({first_line; second_line; third_line; fourth_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + lsline + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.eps']); + saveas(f,[b_folder, '/latent_scores_LV' num2str(i)],'epsc'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f) + end + + % plot latent scores all combined across significant LVs, colorcoded by LVs + colorpattern_LS = hsv(size(output.final_parameters,1)); + f=figure(); temp_legend = []; + for i=1:size(output.final_parameters,1) + x = rescale(output.final_parameters{i,index_epsilon}, -1, 1); + y = rescale(output.final_parameters{i,index_omega}, -1, 1); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(i,:)); + + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + sig_line = 'significant'; + else + sig_line = 'not significant'; + end + title({first_line;second_line}); % add third line + hold on + temp_legend{i} = ['LV ', num2str(i), ': ', sig_line]; + end + % temp_legend{nn} = selected_variables{1,ii}; + xlabel(LS_epsilon); + ylabel(LS_omega); + lsline + [~, lgd_data] = legend(temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/latent_scores_combined_LV_color'], '-dpng', '-r0'); + saveas(f, [b_folder, '/latent_scores_combined_LV_color.fig']); + % saveas(f, [b_folder, '/latent_scores_combined_LV_color.eps']); + saveas(f,[b_folder, '/latent_scores_combined_LV_color'],'epsc'); + close(f) + + % % standardize latent scores and plot all of them in one graph, colorcoded + % % by diagnoses + % % first transpose latent scores so that they fit function, then standardize + % % feature-wise (per LV) + % output.epsilon_stand = (dp_standardize(output.epsilon'))'; + % output.omega_stand = (dp_standardize(output.omega'))'; + + % plot all latent scores according to LVs, colorcoded by diagnoses + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + f=figure(); + for i=1:size(output.final_parameters,1) + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = rescale(output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1, 1); + y = rescale(output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1 , 1); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + end + end + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line = ['significant']; + % else + % third_line = ['not significant']; + % end + title({first_line; second_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_combined_diagnosis_color'], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.fig']); + % saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.eps']); + saveas(f,[b_folder, '/latent_scores_combined_diagnosis_color'],'epsc'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f); + + +end + +if any(strcmp(IN.specific, 'detailed') | all_jobs) + d_folder = [collection_folder, '/detailed']; + mkdir(d_folder); + + % detailed_results_folder = [IN.results_path(1:(strfind(IN.results_path, 'final')-1)), 'detailed_results']; + cd(d_folder); + + for i=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(i), '.mat']); + if exist('opt_parameters') + if size(opt_parameters,2) == 8 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters; + elseif exist('opt_parameters_temp') + if size(opt_parameters_temp,2) == 8 || size(opt_parameters_temp,2) == 10 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters_temp,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters_temp; + else + disp('Something''s wrong!'); + end + % opt_dir = [detailed_results_folder, '/opt_parameters_' num2str(i)]; + % mkdir(opt_dir); + % cd(opt_dir); + + d_folder_opt = [d_folder, '/opt_parameters_' num2str(i)]; + mkdir(d_folder_opt); + cd(d_folder_opt); + + % for ii=1:size(temp_opt_param,1) + % %% visualize results + % % write brain vector to nifti file + % nk_WriteVol(temp_opt_param{ii,opt_param_u}, ['brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % % dp_resample_image([d_folder_opt, '/brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], [1 1 1]); + % end + + %% visualize behavior vector in barplot + + for ii=1:(size(temp_opt_param,1)) + x = temp_opt_param{ii,opt_param_v}; + f=subplot(round(size(temp_opt_param,1)/2),2,ii); + nn=0; + hold on + temp_legend=[]; temp_all = 0; + for iii=1:size(selected_variables,2) + switch class(selected_variables{2,iii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,iii}))}); + for iiii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,iii}.(fields{iiii}),2); + bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,iii}, ' ', strrep(fields{iiii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,iii},2); + bar((temp_all+1):(temp_all+size(temp_current,2)),x((temp_all+1):(temp_all+size(temp_current,2))), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+size(temp_current,2); + temp_legend{nn} = selected_variables{1,iii}; + hold on + end + end + axis([0 (size(temp_opt_param{ii,opt_param_v},1)+1) -1 1]); + xlabel({'\color{black}clinical features'}, 'FontSize', font_size); + ylabel('weights', 'FontSize', font_size); + if temp_opt_param{ii,opt_param_p}<=output.pvalue_FDR(i) + significance_opt = 'significant'; + else + significance_opt = 'not significant'; + end + subplot_title = {['Iteration ', num2str(ii), ', p-value (FDR-corrected) = ' num2str(temp_opt_param{ii,opt_param_p}), ', Spearman''s RHO = ', num2str(temp_opt_param{ii,opt_param_RHO}), significance_opt]}; + title(subplot_title, 'FontSize', font_size, 'FontWeight', 'normal'); + + end + % set(gcf, 'Position', get(0, 'Screensize')); + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + suptitle({first_line; [second_line, ', ' third_line]}); % add third line + + % print([detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + + print([d_folder_opt, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'png'); + % saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'eps'); + saveas(f,[d_folder_opt, '/behavior_opt_parameters_',num2str(i)],'epsc'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'fig'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + close all; + end + +end + +if any(strcmp(IN.specific, 'correlation') | all_jobs) + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + s_folder = [collection_folder, '/correlation']; + mkdir(s_folder); + latent_selection = {index_epsilon, index_omega; LS_epsilon, LS_omega; 'epsilon', 'omega'}; + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + fields = fieldnames(output.post_hoc_correlations.correlations); + fields(contains(fields, 'RHO')|contains(fields, 'p'))=[]; + for ff=1:size(fields,1) + p_sig_collection = output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables < 0.05; + for l=1:size(latent_selection,2) + for i=1:size(output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables,1) + for ii=1:size(output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables,2) + if p_sig_collection(i,ii) + f=figure(); + for iii=1:size(input.selected_studygroups,2) + w = output.final_parameters{round(ii/2),1}; + x = output.final_parameters{round(ii/2),latent_selection{1,l}}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,iii})); + temp=output.post_hoc_correlations.data_collection(:,i); + y = temp(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,iii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(iii,:)); + hold on + end + + xlabel(latent_selection{2,l}, 'FontSize', font_size); + y_temp = output.post_hoc_correlations.data_table.Properties.VariableNames(ii); + y_temp = strrep(y_temp, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); + title(['p-value = ' num2str(output.post_hoc_correlations.correlations.(fields{ff}).table_p{i,ii}), ', Spearman''s RHO = ', num2str(output.post_hoc_correlations.correlations.(fields{ff}).table_RHO{i,ii})], 'FontSize', font_size); % add third line + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + hold on + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}], '-dpng', '-r0'); + saveas(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}, '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}, '.eps']); + saveas(f,[s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}],'epsc'); + close all; + + end + end + end + end + end + save(IN.results_path, 'input', 'output', 'setup'); + +end + + +end diff --git a/Visualization_Module/dp_visualize_data_Devmod.m b/Visualization_Module/dp_visualize_data_Devmod.m new file mode 100644 index 0000000..3f9e2ab --- /dev/null +++ b/Visualization_Module/dp_visualize_data_Devmod.m @@ -0,0 +1,810 @@ +%% function to visualize SPLS output + +function dp_visualize_data_Devmod(IN) + +load(IN.results_path); + +switch IN.overall_analysis + case 'Stress' + overall_folder = '/volume/HCStress/Analysis/Stress'; + case 'Resilience' + overall_folder = '/volume/HCStress/Analysis/Resilience'; + case 'Schizotypy' + overall_folder = '/volume/MU_Pronia_PLS_Schizotypy/Analysis/SPLS/Schizotypy'; + case 'Mimics' + overall_folder = '/volume/DP_Mimics/Analysis/Mimics'; + case 'immune' + overall_folder = '/volume/RU_DP_immune/Analysis/immune'; + case 'MDD_Trauma' + overall_folder = '/volume/projects/ST_Trauma_MDD/Analysis/MDD_Trauma'; + case 'FF' + overall_folder = '/volume/projects/DP_JL_Forensics/Analysis/FF'; + case 'COPE' + overall_folder = '/volume/projects/ES_become/ES_COPE/Analysis/SPLS/COPE'; +end + +folder_name = [setup.date, '_', input.name]; +if contains(IN.results_path, 'final_vis') + collection_folder = [overall_folder, '/', folder_name, '/final_vis']; + val_log = false; +elseif contains(IN.results_path, 'validation_vis') + collection_folder = [overall_folder, '/', folder_name, '/validation_vis']; + val_log = true; +end + +mkdir(collection_folder); + +if ~isfield(IN, 'specific') + all_jobs = true; + IN.specific = 'empty'; +else + all_jobs = false; +end + +marker_size = 14; +font_size = 14; +LV_x = 'clinical features'; +LV_y = 'weights'; +LS_epsilon = 'brain score'; +LS_omega = 'behavior score'; + +%% get min and max voxels +output.min_max_table = dp_min_max_voxels(output.final_parameters); + +%% compute sociodemographic and clinical outcome correlations +if any(strcmp(IN.specific, 'sociodemographic') | all_jobs) + [input, output, setup] = dp_sociodemographic_2020_full(IN); +end + +if any(strcmp(IN.specific, 'post_hoc') | all_jobs) + p_folder = [collection_folder, '/post_hoc_correlation']; + mkdir(p_folder); +% load('/volume/RU_DP_immune/Analysis/24-Apr-2021/Immune_only_678_IQRadd_NCV55_noval_min10_2020_5000AUC_Dev/final_results/result_final_vis.mat'); + group_select = 'all'; % all OR fold + correct = 1; % 1=yes, 2=no + output.RHO_all=[]; + epsilon=[]; omega=[]; latent_scores=[];latent_scores_names=[]; + switch group_select + case 'all' + temp = load(input.MRI_path); + name_temp = fieldnames(temp); + X = temp.(name_temp{1}); + Y = input.Y; + + for i=1:(size(output.final_parameters,1)-1) + + IN_x.train = X; + IN_x.test = X; + IN_y.train = Y; + IN_y.test = Y; + + switch correct + + case 1 % with site correction + if i==1 + COV.train = input.sites; + COV.test = input.sites; + else + COV.train = nan(size(input.sites,1),1); + COV.test = nan(size(input.sites,1),1); + input.correction_target = 3; + end + case 2 % without site correction + COV.train = nan(size(input.sites,1),1); + COV.test = nan(size(input.sites,1),1); + input.correction_target = 3; + end + + [OUT_x, OUT_y] = dp_master_correctscale(IN_x, IN_y, COV, input.scaling_method, input.correction_target); + + u = output.final_parameters{i,4}; + v = output.final_parameters{i,5}; + + [output.RHO_all(i), epsilon, omega, u, v] = dp_projection(OUT_x.train, OUT_y.train, u, v, input.correlation_method); + + [X,Y] = proj_def(X, Y, u, v); + latent_scores = [latent_scores, omega, epsilon]; + latent_scores_names = [latent_scores_names, {['omega_LV', num2str(i)], ['epsilon_LV', num2str(i)]}]; + end + + output.latent_scores_table = array2table(latent_scores, 'RowNames', input.final_ID, 'VariableNames', latent_scores_names); + + case 'fold' + + brain_scores_LV3 = epsilon{3}; + brain_scores_LV4 = epsilon{4}; + output.latent_scores_table = array2table([brain_scores_LV3, brain_scores_LV4], 'RowNames', input.final_ID, 'VariableNames', {'epsilon_LV3', 'epsilon_LV4'}); + end + + correlation_names=[]; + + if any(contains(IN.SD_selection, 'GAF')) + try temp_names = input.data_complete.foranalysis.additional.Properties.VariableNames(contains(input.data_complete.foranalysis.additional.Properties.VariableNames, 'TGF', 'IgnoreCase', false)); + input.data_complete.foranalysis.additional(:, temp_names)=[]; + end + correlation_names = [correlation_names, input.data_complete.foranalysis.additional.Properties.VariableNames(contains(input.data_complete.foranalysis.additional.Properties.VariableNames, {'GAF', 'GF'}, 'IgnoreCase', false))]; + end + + if any(contains(IN.SD_selection, 'BDI')) + correlation_names = [correlation_names, input.data_complete.foranalysis.additional.Properties.VariableNames(contains(input.data_complete.foranalysis.additional.Properties.VariableNames, 'BDI', 'IgnoreCase', false))]; + end + + if any(contains(IN.SD_selection, 'NEO')) + correlation_names = [correlation_names, input.data_complete.foranalysis.additional.Properties.VariableNames(contains(input.data_complete.foranalysis.additional.Properties.VariableNames, 'NEO', 'IgnoreCase', false))]; + end + + if any(contains(IN.SD_selection, 'WHO')) + correlation_names = [correlation_names, input.data_complete.foranalysis.additional.Properties.VariableNames(contains(input.data_complete.foranalysis.additional.Properties.VariableNames, 'WHO', 'IgnoreCase', false))]; + end + + if any(contains(IN.SD_selection, 'PANSS')) + correlation_names = [correlation_names, input.data_complete.foranalysis.additional.Properties.VariableNames(contains(input.data_complete.foranalysis.additional.Properties.VariableNames, 'PANSS', 'IgnoreCase', false))]; + end + + if any(contains(IN.SD_selection, 'COGDIS')) + load('/volume/RU_DP_immune/Data/Immune_megapaper_request_Disc/Immune_megapaper_request_Disc/DATA/18-Dec-2020/Immune_megapaper_request_Disc_Data_all_18-Dec-2020.mat'); + temp_data_table = data_table_all; + + load('/volume/RU_DP_immune/Data/Immune_megapaper_request_Repl/Immune_megapaper_request_Repl/DATA/18-Dec-2020/Immune_megapaper_request_Repl_Data_all_18-Dec-2020.mat'); + temp_names = temp_data_table.Properties.VariableNames(matches(temp_data_table.Properties.VariableNames, data_table_all.Properties.VariableNames)); + data_table_all = [temp_data_table(:, temp_names); data_table_all(:, temp_names)]; + cogdis_items_names = {'SPI_A_COGDIS_B1_1', 'SPI_A_COGDIS_C2_1', 'SPI_A_COGDIS_C3_1', 'SPI_A_COGDIS_C4_1', 'SPI_A_COGDIS_C5_1',... + 'SPI_A_COGDIS_D3_1', 'SPI_A_COGDIS_D4_1', 'SPI_A_COGDIS_O3_1', 'SPI_A_COGDIS_O7_1'}; + cogdis_names = data_table_all.Properties.VariableNames(contains(data_table_all.Properties.VariableNames, cogdis_items_names, 'IgnoreCase', true) & contains(data_table_all.Properties.VariableNames, 'red', 'IgnoreCase', true)); + + input.data_complete.foranalysis.cogdis_data = data_table_all(:, cogdis_names); + input.data_complete.foranalysis.cogdis_data.cogdis_fulfilled = sum(input.data_complete.foranalysis.cogdis_data.Variables>=3,2)>=2; + input.data_complete.foranalysis.cogdis_data.Properties.RowNames = input.data_complete.foranalysis.basic.Properties.RowNames; + correlation_names = [correlation_names, 'cogdis_fulfilled']; + end + + if any(contains(IN.SD_selection, 'neurocognition')) + % get extraction data + load('/volume/RU_DP_immune/Data/Immune_megapaper_request_Disc/Immune_megapaper_request_Disc/DATA/18-Dec-2020/Immune_megapaper_request_Disc_Data_all_18-Dec-2020.mat'); + temp_data_table = data_table_all; + + load('/volume/RU_DP_immune/Data/Immune_megapaper_request_Repl/Immune_megapaper_request_Repl/DATA/18-Dec-2020/Immune_megapaper_request_Repl_Data_all_18-Dec-2020.mat'); + temp_names = temp_data_table.Properties.VariableNames(matches(temp_data_table.Properties.VariableNames, data_table_all.Properties.VariableNames)); + data_table_all = [temp_data_table(:, temp_names); data_table_all(:, temp_names)]; + + NC = rs_neurocognition(data_table_all); + input.data_complete.foranalysis.neurocognition=table; + for i=1:size(input.data_complete.foranalysis.basic.Properties.RowNames,1) + log_temp = str2num(cell2mat(data_table_all.PSN)) == str2num(input.data_complete.foranalysis.basic.Properties.RowNames{i,1}); + if sum(log_temp)~=0 + input.data_complete.foranalysis.neurocognition(i,:) = NC.single_scores(log_temp, :); + else + input.data_complete.foranalysis.neurocognition(i,:) = array2table(nan(1, size(NC.single_scores,2)), 'VariableNames', NC.single_scores.Properties.VariableNames); + end + end + input.data_complete.foranalysis.neurocognition.Properties.RowNames = input.data_complete.foranalysis.basic.Properties.RowNames; + correlation_names = [correlation_names, input.data_complete.foranalysis.neurocognition.Properties.VariableNames]; + end + + if any(contains(IN.SD_selection, 'CTQ')) + ctq_names = sort(input.data_complete.foranalysis.CTQ.Properties.VariableNames); + ctq_data_old = input.data_complete.foranalysis.CTQ.Variables; + CTQ.emotional_abuse = [3,8,14,18,25]; + CTQ.physical_abuse = [9,11,12,15,17]; + CTQ.sexual_abuse = [20,21,23,24,27]; + CTQ.emotional_neglect = [5,7,13,19,28]; + CTQ.physical_neglect = [1,2,4,6,26]; + CTQ.denial = [10,16,22]; + ctq_data = [sum(ctq_data_old(:, CTQ.emotional_abuse),2), sum(ctq_data_old(:, CTQ.physical_abuse),2), sum(ctq_data_old(:, CTQ.sexual_abuse),2), sum(ctq_data_old(:, CTQ.emotional_neglect),2), sum(ctq_data_old(:, CTQ.physical_neglect),2), sum(ctq_data_old(:, CTQ.denial),2)]; + input.data_complete.foranalysis.CTQ{:, fieldnames(CTQ)'} = ctq_data; + correlation_names = [correlation_names, fieldnames(CTQ)']; + end + + if any(contains(IN.SD_selection, 'LOS')) + correlation_names = [correlation_names, 'Length_of_Storage_Days']; + end + + collect_table = dp_find_tables(input.data_complete.foranalysis, correlation_names, input.final_ID); + + [RHO, P] = corr(collect_table.Variables, output.latent_scores_table{input.final_ID,:}, 'rows', 'pairwise', 'type', 'Spearman'); + output.post_hoc_correlations.RHO_table = array2table(RHO, 'RowNames', collect_table.Properties.VariableNames, 'VariableNames', output.latent_scores_table.Properties.VariableNames); + output.post_hoc_correlations.P_table = array2table(P, 'RowNames', collect_table.Properties.VariableNames, 'VariableNames', output.latent_scores_table.Properties.VariableNames); + p_adj = dp_FDR_adj(output.post_hoc_correlations.P_table.Variables); + output.post_hoc_correlations.P_table_adj = output.post_hoc_correlations.P_table; + output.post_hoc_correlations.P_table_adj.Variables = p_adj; + + writetable(output.post_hoc_correlations.RHO_table, [p_folder, '/post_hoc_correlations.xlsx'], 'WriteRowNames', true, 'Sheet', 'RHO'); + writetable(output.post_hoc_correlations.P_table_adj, [p_folder, '/post_hoc_correlations.xlsx'], 'WriteRowNames', true, 'Sheet', 'P'); +end + + +if val_log + output.final_parameters = output.validation_results; +end + +input.Y_names = strrep(input.Y_names, '_T0', ''); +load(input.NM_structure); + +%% prepare questionnaire data + +% define column names for matrices so that you can access them later by +% indexing +% output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_parameters_names_long = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop + +% indices for later use +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names, 'p'); +opt_RHO = strcmp(output.parameters_names, 'RHO'); +index_epsilon = strcmp(output.parameters_names, 'epsilon'); +index_omega = strcmp(output.parameters_names, 'omega'); +index_epsilon_all = strcmp(output.parameters_names, 'epsilon_all'); +index_omega_all = strcmp(output.parameters_names, 'omega_all'); + +if any(strcmp(IN.specific, 'images') | all_jobs) + i_folder = [collection_folder, '/images']; + mkdir(i_folder); + cd(i_folder); + % write brain vector to nifti file + for i=1:size(output.final_parameters,1) + nk_WriteVol(output.final_parameters{i,4}, ['brain_LV_final_' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + end +end + +if any(strcmp(IN.specific, 'atlas') | all_jobs) + + a_folder = [collection_folder, '/atlases']; + mkdir(a_folder); + + IN.sample_size = size(input.data_collection.Diag,1); + if contains(IN.overall_analysis, 'immune') + IN.var_names = {'immune'}; + elseif contains(IN.overall_analysis, 'MDD') + IN.var_names = {'MDD_Trauma'}; + else + IN.var_names = input.AllVarNames; + end + IN.atlases = dp_atlas_find(IN); + + IN.voxel_cutoff = 0; + + IN.analysis_name = input.name; + + for i=1:size(output.final_parameters,1) + IN.vectors(i,:) = output.final_parameters{i,4}; + end + + IN.a_folder = a_folder; + [output.atlas_readouts, output.atlas_readouts_filled] = dp_atlas_readout_new(IN); + + IN.atlas_readouts = output.atlas_readouts; + output.atlas_table_readouts = dp_atlas_table_readout(IN); + + save(IN.results_path, 'input', 'output', 'setup'); + + spider_folder = [collection_folder, '/spider_plots']; + mkdir(spider_folder); + dp_atlases_vis_spiderplot(IN.results_path, spider_folder); + +end + +if contains(IN.overall_analysis, 'immune') + try input = rmfield(input, 'selected_features'); + end +end + +if any(strcmp(IN.specific, 'behavior') | all_jobs) + + b_folder = [collection_folder, '/behavior']; + mkdir(b_folder); + + switch IN.analysis_origin + case 1 % classical PRONIA analysis + + load('/volume/HCStress/Doc/all_questionnaires.mat', 'BS_questionnaire', 'CISS_questionnaire', 'CTQ_questionnaire', 'RSA_questionnaire', 'WSS_questionnaire'); + all_questionnaires = [CISS_questionnaire', RSA_questionnaire', CTQ_questionnaire', BS_questionnaire', WSS_questionnaire']; + + %% visualize behavior vector in barplot + % CTQ.emotional_abuse)), (CTQ.physical_abuse)), (CTQ.sexual_abuse)), (CTQ.emotional_neglect)), (CTQ.physical_neglect)), (CTQ.denial))]; + % load([setup.analysis_folder, '/' setup.date, '_', name, '_data_collection.mat']); + % if sum(ismember(input.Y_names, 'Age')) + sum(ismember(input.Y_names, 'Sex'))==2 + % selected_variables = [input.selected_features(1,:), 'Age', 'Sex'; input.selected_features(2,:), 1, 1]; + % elseif sum(ismember(input.Y_names, 'Age')) + % selected_variables = [input.selected_features(1,:), 'Age'; input.selected_features(2,:), 1]; + % elseif sum(ismember(input.Y_names, 'Sex')) + % selected_variables = [input.selected_features(1,:), 'Sex'; input.selected_features(2,:), 1]; + % else + % selected_variables = input.selected_features; + % end + + log_f=[]; + if isfield(input, 'selected_features') + for i=1:size(input.selected_features,2) + log_f(i,:) = contains(input.Y_names(1,:), input.selected_features{1,i}); + end + + if size(log_f,1)>1 + log_c = sum(sum(log_f==0)==size(input.selected_features,2)); + else + log_c = sum((log_f==0)==size(input.selected_features,2)); + end + + selected_variables = [[input.selected_features(1,:), input.Y_names((end-(log_c-1)):end)]; [input.selected_features(2,:), num2cell(ones(1,log_c))]]; + + count=0; + for i=1:size(input.selected_features(2,:),2) + temp=size(fieldnames(input.selected_features{2,i}),1); + count=count+temp; + end + colorpattern_LV = colorcube((count + log_c)); + + % collect items from vectors + for i=1:(size(output.final_parameters,1)) + x = output.final_parameters{i,opt_v}; + output.questions_collection.(['LV_', num2str(i)]).items = input.Y_names(x~=0)'; + output.questions_collection.(['LV_', num2str(i)]).subscales = input.subscales(x~=0)'; + for q=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + output.questions_collection.(['LV_', num2str(i)]).questions{q} = all_questionnaires(1,strcmp(all_questionnaires(2,:), output.questions_collection.(['LV_', num2str(i)]).items{q})); + end + + % errors = output.CI_v{i}; + f=figure(); + nn=0; cc=0; + hold on + temp_legend=[]; temp_all_x = 0; temp_all_bars = 0; temp_handle=[]; %sel_temp_names=[]; + for ii=1:(size(selected_variables,2)) + switch class(selected_variables{2,ii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,ii}))}); + for iii=1:size(fields,1) + temp_current=size(selected_variables{2,ii}.(fields{iii}),2); + if IN.plot_all + nn=nn+1; cc=cc+1; + temp_handle(nn) = bar((temp_all_bars+1):(temp_all_bars+temp_current),x((temp_all_x+1):(temp_all_x+temp_current)), 'FaceColor', colorpattern_LV(cc,:)); + temp_all_bars = temp_all_bars+temp_current; + temp_legend{nn} = [selected_variables{1,ii}, ' ', strrep(fields{iii}, '_', ' ')]; + hold on + else + log_nonzero = x((temp_all_x+1):(temp_all_x+temp_current))~=0; + if any(log_nonzero) + nn=nn+1; cc=cc+1; + x_temp = x((temp_all_x+1):(temp_all_x+temp_current)); + temp_handle(nn) = bar((temp_all_bars+1):(temp_all_bars+sum(log_nonzero)), x_temp(log_nonzero), 'FaceColor', colorpattern_LV(cc,:)); + temp_all_bars = temp_all_bars+sum(log_nonzero); + temp_legend{nn} = [selected_variables{1,ii}, ' ', strrep(fields{iii}, '_', ' ')]; + hold on + else + cc=cc+1; + end + end + temp_all_x = temp_all_x+temp_current; + end + case 'double' + temp_current=size(selected_variables{2,ii},2); + if IN.plot_all + nn=nn+1; cc=cc+1; + temp_handle(nn) = bar((temp_all_bars+1):(temp_all_bars+temp_current),x((temp_all_x+1):(temp_all_x+temp_current)), 'FaceColor', colorpattern_LV(cc,:)); + temp_all_bars = temp_all_bars+temp_current; + temp_legend{nn} = strrep(selected_variables{1,ii}, '_', ' '); + hold on + else + log_nonzero = x((temp_all_x+1):(temp_all_x+temp_current))~=0; + if any(log_nonzero) + nn=nn+1; cc=cc+1; + x_temp = x((temp_all_x+1):(temp_all_x+temp_current)); + temp_handle(nn) = bar((temp_all_bars+1):(temp_all_bars+sum(log_nonzero)),x_temp(log_nonzero), 'FaceColor', colorpattern_LV(cc,:)); + temp_all_bars = temp_all_bars+sum(log_nonzero); + temp_legend{nn} = strrep(selected_variables{1,ii}, '_', ' '); + hold on + else + cc=cc+1; + end + end + temp_all_x = temp_all_x+temp_current; + end + end + + if any(i==input.grid_dynamic.onset) + grid_x = input.grid_dynamic.(['LV_', num2str(i)]).x; + grid_y = input.grid_dynamic.(['LV_', num2str(i)]).y; + end + + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + + if ~val_log + p_sig_threshold = output.pvalue_FDR(i); + fourth_line = 'main sample'; + else + p_sig_threshold = 0.05; + fourth_line = 'validation sample'; + end + + if output.final_parameters{i,opt_p}<=p_sig_threshold + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line; fourth_line}); % add third line + xlabel(LV_x, 'FontSize', font_size); + ylabel(LV_y, 'FontSize', font_size); + + legend(temp_handle, temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + hold all + + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/behavior_LV_' num2str(i)], '-dpng', '-r0'); + saveas(f, [b_folder, '/behavior_LV' num2str(i), '.fig']); + % saveas(f, [b_folder, '/behavior_LV' num2str(i), '.eps']); + saveas(f,[b_folder, '/behavior_LV' num2str(i)],'epsc'); + + close(f); + output.questions_collection.(['LV_', num2str(i)]).final={}; + for qq=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + try output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}, ': ', output.questions_collection.(['LV_', num2str(i)]).questions{1,qq}{1}]; + catch + output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}]; + end + end + + output.questions_collection.(['LV_', num2str(i)]).combined = [output.questions_collection.(['LV_', num2str(i)]).final; output.questions_collection.(['LV_', num2str(i)]).subscales']; + + save(IN.results_path, 'input', 'output', 'setup'); + + end + + % print out questions + fields = fieldnames(output.questions_collection); + name_txt = [b_folder, '/collected_questions.txt']; + writecell(cell(1,1), name_txt); + + for i=1:size(fields,1) + writecell(['LV_', num2str(i); output.questions_collection.(['LV_', num2str(i)]).combined(1,:)'; cell(2,1)],[b_folder, '/collected_questions.txt'],'WriteMode','append') + end + + % print out post hoc correlations + fields = fieldnames(output.post_hoc_correlations.all.correlations.test); + fields = fields(contains(fields, 'table')); + for i=1:size(fields,1) + writetable(output.post_hoc_correlations.all.correlations.test.(fields{i}), [b_folder, '/post_hoc_corr_test.xlsx'], 'WriteRowNames', true, 'Sheet', fields{i}); + end + + fields = fieldnames(output.post_hoc_correlations.all.correlations.validation); + fields = fields(contains(fields, 'table')); + for i=1:size(fields,1) + writetable(output.post_hoc_correlations.all.correlations.validation.(fields{i}), [b_folder, '/post_hoc_corr_val.xlsx'], 'WriteRowNames', true, 'Sheet', fields{i}); + end + + %% plot latent scores epsilon and omega, color code diagnostic groups (HC, + % ROD, ROP, CHR) + + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + % temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; + % temp_1(output.CV.cv_outer_indices.TestInd{1,i}) = true; + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + for i=1:size(output.final_parameters,1) + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = rescale(output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1, 1); + y = rescale(output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1, 1); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + + if ~val_log + p_sig_threshold = output.pvalue_FDR(i); + fourth_line = 'main sample'; + else + p_sig_threshold = 0.05; + fourth_line = 'validation sample'; + end + + if output.final_parameters{i,opt_p}<=p_sig_threshold + third_line = 'significant'; + else + third_line = 'not significant'; + end + + title({first_line; second_line; third_line; fourth_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + lsline + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.eps']); + saveas(f,[b_folder, '/latent_scores_LV' num2str(i)],'epsc'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f) + end + + % plot latent scores all combined across significant LVs, colorcoded by LVs + colorpattern_LS = hsv(size(output.final_parameters,1)); + f=figure(); temp_legend = []; + for i=1:size(output.final_parameters,1) + x = rescale(output.final_parameters{i,index_epsilon}, -1, 1); + y = rescale(output.final_parameters{i,index_omega}, -1, 1); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(i,:)); + + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + sig_line = 'significant'; + else + sig_line = 'not significant'; + end + title({first_line;second_line}); % add third line + hold on + temp_legend{i} = ['LV ', num2str(i), ': ', sig_line]; + end + % temp_legend{nn} = selected_variables{1,ii}; + xlabel(LS_epsilon); + ylabel(LS_omega); + lsline + [~, lgd_data] = legend(temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/latent_scores_combined_LV_color'], '-dpng', '-r0'); + saveas(f, [b_folder, '/latent_scores_combined_LV_color.fig']); + % saveas(f, [b_folder, '/latent_scores_combined_LV_color.eps']); + saveas(f,[b_folder, '/latent_scores_combined_LV_color'],'epsc'); + close(f) + + % % standardize latent scores and plot all of them in one graph, colorcoded + % % by diagnoses + % % first transpose latent scores so that they fit function, then standardize + % % feature-wise (per LV) + % output.epsilon_stand = (dp_standardize(output.epsilon'))'; + % output.omega_stand = (dp_standardize(output.omega'))'; + + % plot all latent scores according to LVs, colorcoded by diagnoses + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + f=figure(); + for i=1:size(output.final_parameters,1) + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = rescale(output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1, 1); + y = rescale(output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})), -1 , 1); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + end + end + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line = ['significant']; + % else + % third_line = ['not significant']; + % end + title({first_line; second_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_combined_diagnosis_color'], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.fig']); + % saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.eps']); + saveas(f,[b_folder, '/latent_scores_combined_diagnosis_color'],'epsc'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f); + else + dp_vis_general(b_folder, IN.results_path, {'v'}, IN.plot_all, false, {'abstinence_aktuell', 'abstinence_always'}, input.validation_set) + end + case 2 + dp_vis_general(b_folder, IN.results_path, {'v'}, IN.plot_all, false, {'abstinence_aktuell', 'abstinence_always'}, input.validation_set) + end +end + +if any(strcmp(IN.specific, 'detailed') | all_jobs) + d_folder = [collection_folder, '/detailed']; + mkdir(d_folder); + + % detailed_results_folder = [IN.results_path(1:(strfind(IN.results_path, 'final')-1)), 'detailed_results']; + cd(d_folder); + + for i=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(i), '.mat']); + if exist('opt_parameters') + if size(opt_parameters,2) == 8 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters; + elseif exist('opt_parameters_temp') + if size(opt_parameters_temp,2) == 8 || size(opt_parameters_temp,2) == 10 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters_temp,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters_temp; + else + disp('Something''s wrong!'); + end + % opt_dir = [detailed_results_folder, '/opt_parameters_' num2str(i)]; + % mkdir(opt_dir); + % cd(opt_dir); + + d_folder_opt = [d_folder, '/opt_parameters_' num2str(i)]; + mkdir(d_folder_opt); + cd(d_folder_opt); + + % for ii=1:size(temp_opt_param,1) + % %% visualize results + % % write brain vector to nifti file + % nk_WriteVol(temp_opt_param{ii,opt_param_u}, ['brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % % dp_resample_image([d_folder_opt, '/brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], [1 1 1]); + % end + + %% visualize behavior vector in barplot + + for ii=1:(size(temp_opt_param,1)) + x = temp_opt_param{ii,opt_param_v}; + f=subplot(round(size(temp_opt_param,1)/2),2,ii); + nn=0; + hold on + temp_legend=[]; temp_all = 0; + for iii=1:size(selected_variables,2) + switch class(selected_variables{2,iii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,iii}))}); + for iiii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,iii}.(fields{iiii}),2); + bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,iii}, ' ', strrep(fields{iiii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,iii},2); + bar((temp_all+1):(temp_all+size(temp_current,2)),x((temp_all+1):(temp_all+size(temp_current,2))), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+size(temp_current,2); + temp_legend{nn} = selected_variables{1,iii}; + hold on + end + end + axis([0 (size(temp_opt_param{ii,opt_param_v},1)+1) -1 1]); + xlabel({'\color{black}clinical features'}, 'FontSize', font_size); + ylabel('weights', 'FontSize', font_size); + if temp_opt_param{ii,opt_param_p}<=output.pvalue_FDR(i) + significance_opt = 'significant'; + else + significance_opt = 'not significant'; + end + subplot_title = {['Iteration ', num2str(ii), ', p-value (FDR-corrected) = ' num2str(temp_opt_param{ii,opt_param_p}), ', Spearman''s RHO = ', num2str(temp_opt_param{ii,opt_param_RHO}), significance_opt]}; + title(subplot_title, 'FontSize', font_size, 'FontWeight', 'normal'); + + end + % set(gcf, 'Position', get(0, 'Screensize')); + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + suptitle({first_line; [second_line, ', ' third_line]}); % add third line + + % print([detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + + print([d_folder_opt, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'png'); + % saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'eps'); + saveas(f,[d_folder_opt, '/behavior_opt_parameters_',num2str(i)],'epsc'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'fig'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + close all; + end + +end + +if any(strcmp(IN.specific, 'correlation') | all_jobs) + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + s_folder = [collection_folder, '/correlation']; + mkdir(s_folder); + latent_selection = {index_epsilon, index_omega; LS_epsilon, LS_omega; 'epsilon', 'omega'}; + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + fields = fieldnames(output.post_hoc_correlations.correlations); + fields(contains(fields, 'RHO')|contains(fields, 'p'))=[]; + for ff=1:size(fields,1) + p_sig_collection = output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables < 0.05; + for l=1:size(latent_selection,2) + for i=1:size(output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables,1) + for ii=1:size(output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables,2) + if p_sig_collection(i,ii) + f=figure(); + for iii=1:size(input.selected_studygroups,2) + w = output.final_parameters{round(ii/2),1}; + x = output.final_parameters{round(ii/2),latent_selection{1,l}}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,iii})); + temp=output.post_hoc_correlations.data_collection(:,i); + y = temp(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,iii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(iii,:)); + hold on + end + + xlabel(latent_selection{2,l}, 'FontSize', font_size); + y_temp = output.post_hoc_correlations.data_table.Properties.VariableNames(ii); + y_temp = strrep(y_temp, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); + title(['p-value = ' num2str(output.post_hoc_correlations.correlations.(fields{ff}).table_p{i,ii}), ', Spearman''s RHO = ', num2str(output.post_hoc_correlations.correlations.(fields{ff}).table_RHO{i,ii})], 'FontSize', font_size); % add third line + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + hold on + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}], '-dpng', '-r0'); + saveas(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}, '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}, '.eps']); + saveas(f,[s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}],'epsc'); + close all; + + end + end + end + end + end + save(IN.results_path, 'input', 'output', 'setup'); + +end + + +end diff --git a/Visualization_Module/dp_visualize_data_multi.m b/Visualization_Module/dp_visualize_data_multi.m new file mode 100644 index 0000000..cae2dc5 --- /dev/null +++ b/Visualization_Module/dp_visualize_data_multi.m @@ -0,0 +1,1179 @@ +%% function to visualize SPLS output + +function dp_visualize_data_multi(IN) + +switch IN.overall_analysis + case 'Stress' + overall_folder = '/volume/HCStress/Analysis/Stress'; + case 'Resilience' + overall_folder = '/volume/HCStress/Analysis/Resilience'; +end + +if ~isfield(IN, 'specific') + all_jobs = true; + IN.specific = 'empty'; +else + all_jobs = false; +end + +marker_size = 14; +font_size = 14; +LV_x = 'clinical features'; +LV_y = 'weights'; +LS_epsilon = 'brain score'; +LS_omega = 'behavior score'; + +%% visualize results +load(IN.results_path); +if strfind(IN.results_path, '2018') + analysis_date = '2018'; +elseif strfind(IN.results_path, '2019') + analysis_date = '2019'; +elseif strfind(IN.results_path, '2020') + analysis_date = '2020'; +end +folder_name = [setup.date, '-', IN.results_path(5+strfind(IN.results_path, analysis_date):(strfind(IN.results_path, '/final_results')-1))]; +collection_folder = [overall_folder, '/', folder_name]; +mkdir(collection_folder); + +%% compute sociodemographic and clinical outcome correlations +if any(strcmp(IN.specific, 'sociodemographic') | all_jobs) + [input, output, setup] = dp_sociodemographic(IN); + + subsets = {'all', 'hold_out'}; + + corr_folder = [collection_folder, '/correlations']; + mkdir(corr_folder); + + for s=1:size(subsets,2) + fields=fieldnames(output.(['tables_', subsets{s}, '_Rho_p'])); + for i=1:size(fields,1) + latent_scores = fieldnames(output.(['tables_', subsets{s}, '_Rho_p']).(fields{i})); + for ii=1:size(latent_scores,1) + % dp_txt_write(s_folder, ['/corr_RHO_', fields{i}, '_', latent_scores{ii}], output.tables_ho_RHO.(fields{i}).(latent_scores{ii})', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \n'); + % dp_txt_write(s_folder, ['/corr_p_', fields{i}, '_', latent_scores{ii}], output.tables_ho_p.(fields{i}).(latent_scores{ii})', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \n'); + dp_txt_write(corr_folder, ['/corr_all_', fields{i}, '_', latent_scores{ii}], output.(['tables_', subsets{s}, '_Rho_p']).(fields{i}).(latent_scores{ii})', '%.3f \t %.3f \t %.3f \t %.3f \t %.3f \n'); + end + end + end + +end + +input.behavior_names = strrep(input.behavior_names, '_T0', ''); +load(input.NM_structure); + +%% prepare questionnaire data +load('/volume/HCStress/Doc/Stress_Resilience_questionnaires.mat'); +Resilience_Stress_questionnaires = [CISS_questionnaire', RSA_questionnaire', CTQ_questionnaire', BS_questionnaire']; + +% define column names for matrices so that you can access them later by +% indexing +% output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_parameters_names_long = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop + +% indices for later use +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names, 'p'); +opt_RHO = strcmp(output.parameters_names, 'RHO'); +index_epsilon = strcmp(output.parameters_names, 'epsilon'); +index_omega = strcmp(output.parameters_names, 'omega'); + +% results_folder = IN.results_path(1:(strfind(IN.results_path, '/result.mat')-1)); + +% cd(results_folder); +% +% % write brain vector to nifti file +% for i=1:size(output.final_parameters,1) +% nk_WriteVol(output.final_parameters{i,4}, ['brain_LV' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); +% end + +if any(strcmp(IN.specific, 'images') | all_jobs) + i_folder = [collection_folder, '/images']; + mkdir(i_folder); + cd(i_folder); + % write brain vector to nifti file + for i=1:size(output.final_parameters,1) + nk_WriteVol(output.final_parameters{i,4}, ['brain_LV' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % dp_resample_image([collection_folder, '/brain_LV' num2str(i)], [1 1 1]); + end +end + + +if any(strcmp(IN.specific, 'atlas') | all_jobs) + a_folder = [collection_folder, '/atlases']; + mkdir(a_folder); + atlases = {'brainnetome', 'cerebellum'}; + % get clusters for brain regions using hammers and aal atlas + % filepath hammers nifti: /opt/SPM/spm12_v6685_cat12_r1207/atlas/hammers.nii + % filepath hammers description: /opt/SPM/spm12_v6685_cat12_r1207/atlas/labels_dartel_hammers.xml + + for aa=1:size(atlases,2) + + switch atlases{aa} + case 'hammers' + switch size(input.behavior,1) + case 626 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__626_NM_X.mat'; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__627_NM_X.mat'; + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 631 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__631_NM_X.mat'; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__634_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/hammers_3mm_649_X.mat'; + a_number = 1; + end + temp = load('/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30-ancillary-data.tar/Hammers_mith-n30-ancillary-data/Hammers_mith-n30-ancillary-data/labels_hammerssmith_n30_ancillary_data.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + for i=1:size(labels_regions,1) + strf = strfind(labels_regions{i,2}, ' '); + labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + end + + case 'juelich' + switch size(input.behavior,1) + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Julich_Atlas/juelich_maxprob_0_r3mm_630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Julich_Atlas/juelich_maxprob_0_r3mm_627_NM_X.mat'; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Julich_Atlas/juelich_maxprob_0_r3mm_634_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_hammers_juelich_3mm_649_X.mat'; + a_number = 3; + end + temp = load('/volume/HCStress/Data/MRI/Julich_Atlas/juelich_indices_labels.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + for i=1:size(labels_regions,1) + strf = strfind(labels_regions{i,2}, ' '); + labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + end + + case 'brainnetome' + switch size(input.behavior,1) + case 621 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_CISSRSAPAS_3mm_621_X.mat'; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_627_CTQ_BS_NM_X.mat'; + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_634_CISS_RSA_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_hammers_juelich_3mm_649_X.mat'; + a_number = 1; + end + temp = load('/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + % for i=1:size(labels_regions,1) + % strf = strfind(labels_regions{i,2}, '_'); + % labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + % end + case 'cerebellum' + switch size(input.behavior,1) + case 621 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_621_CISSRSAPAS_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_649_CTQ_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/CerebellumMNIflirt_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + + end + + temp=load(atlas_path_full); + fields = fieldnames(temp); + if exist('a_number', 'var') + atlas_for_analysis = round(temp.(fields{1})(a_number,:)); + else + atlas_for_analysis = round(temp.(fields{1})); + end + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + output.regions.(atlases{aa}).regions_count = [C_atlas', counts_atlas]; + output.regions.(atlases{aa}).count_names = {'region_number', 'n_voxels', 'voxel_percentage', 'region_name', 'median_weights', 'mean_weights'}; + output.regions.(atlases{aa}).count = struct('raw', [], 'voxels', [], 'weights', []); + output.regions.(atlases{aa}).count_names_sorting = {'positive', 'negative', 'overall'}; + voxel_cutoff = 0; + fields = fieldnames(output.regions.(atlases{aa}).count); + for i=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count_names_sorting,2) + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_', fields{i}, '.txt'], 'w'); + fprintf(FID, [strrep(input.name,'_',' '), '\n', fields{i} ' sorted']); + fclose(FID); + end + end + + for i=1:size(output.final_parameters,1) + output.regions.(atlases{aa}).log{i,1} = output.final_parameters{i,4}>0; + output.regions.(atlases{aa}).log{i,2} = output.final_parameters{i,4}<0; + output.regions.(atlases{aa}).log{i,3} = output.final_parameters{i,4}~=0; + for ii=1:size(output.regions.(atlases{aa}).log,2) + output.regions.(atlases{aa}).sum{i,ii} = atlas_for_analysis((atlas_for_analysis~=0)' & output.regions.(atlases{aa}).log{i,ii}); + [C, ~, ic] = unique(output.regions.(atlases{aa}).sum{i,ii}); + a_counts = accumarray(ic, 1); + voxel_percentage = a_counts./(counts_atlas(ismember(C_atlas, C))); + output.regions.(atlases{aa}).count.raw{i,ii} = [num2cell(C'), num2cell(a_counts), num2cell(voxel_percentage), labels_regions((C'),2)]; + fix_end = size(output.regions.(atlases{aa}).count.raw{i,ii},2); + log_cutoff = cell2mat(output.regions.(atlases{aa}).count.raw{i,ii}(:, find(strcmp(output.regions.(atlases{aa}).count_names, 'n_voxels')))) 0 + % strf = strfind(mat{1,4}, ' '); + % temp_prefix = mat{1,4}(1:(strf-1)); + % strf_log = ~cellfun(@isempty, strfind(mat(:,4), temp_prefix)); + % temp_mat = [temp_mat; mat(strf_log,:)]; + % output.regions.(atlases{aa}).count.alphabet{b,bb} = temp_mat; + % mat(strf_log,:)=[]; + % end + % end + % end + % + % + % + % + % for fi=1:size(output.regions.(atlases{aa}).count.alphabet,1) + % temp_add = []; + % temp_print = output.regions.(atlases{aa}).count_names([1,2,4,5,3]); + % for f=1:size(temp_print,2) + % % try output.regions.(atlases{aa}).count_names{f} = num2str(temp_print{f}); + % % catch ME + % % end + % temp_add = [temp_add, sprintf('\t'), temp_print{f}]; + % end + % % fields = fieldnames(output.regions.(atlases{aa}).count); + % % for ff=1:size(fields,1) + % for fff = 1:size(output.regions.(atlases{aa}).count.alphabet,2) + % FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_alphabet.txt'], 'a'); + % fprintf(FID, ['\n \n \n', sprintf('\t'), 'Latent Variable ' num2str(fi), '\n \n', temp_add, '\n']); + % fclose(FID); + % end + % % end + % + % % fields = fieldnames(output.regions.(atlases{aa}).count); + % % for ff=1:size(fields,1) + % for fff = 1:size(output.regions.(atlases{aa}).count.alphabet,2) + % if ~isempty(output.regions.(atlases{aa}).count.alphabet{fi,fff}) + % for r=1:size(output.regions.(atlases{aa}).count.alphabet{fi,fff},1) + % temp_print = output.regions.(atlases{aa}).count.alphabet{fi,fff}(r,[1,2,4,5,3]); + % temp_add = []; + % for ii=1:size(temp_print,2) + % try temp_print{ii} = num2str(temp_print{ii}); + % catch ME + % end + % temp_add = [temp_add, sprintf('\t'), temp_print{ii}]; + % end + % + % FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_alphabet.txt'], 'a'); + % fprintf(FID, ['\n' temp_add]); + % fclose(FID); + % end + % end + % end + % % end + % end + % + % end + + end + % just deactivated for paper preparation, needs to be reactivated + % later on + % load(input.MRI) + % MRI_volumes = MRI_for_analysis; + % for i=1:size(output.final_parameters,1) + % u=output.final_parameters{i,4}; + % for ii=1:size(output.regions.(atlases{aa}).log,2) + % log_weights = output.regions.(atlases{aa}).log{i,ii}'; + % for iii=1:size(output.regions.(atlases{aa}).count.alphabet{i,ii},1) + % log_region = atlas_for_analysis == output.regions.(atlases{aa}).count.alphabet{i,ii}{iii,1}; + % output.volumes.(atlases{aa}).names{i,ii} = [output.regions.(atlases{aa}).count.alphabet{i,ii}(:,1)';output.regions.(atlases{aa}).count.alphabet{i,ii}(:,4)']; + % for s=1:size(MRI_volumes,1) + % output.volumes.(atlases{aa}).raw{i,ii}(s,iii) = sum(MRI_volumes(s,log_region&log_weights)); + % % output.volumes.(['LV_', num2str(i)]).weighted.(atlases{aa}){s,iii} = sum(output.final_parameters{i,4}(log_region&log_weights)' .* MRI_volumes(s,log_region&log_weights)); + % end + % end + % end + % MRI_volumes = MRI_volumes - (MRI_volumes*u)*u'; + % end + + end + + save(IN.results_path, 'input', 'output', 'setup'); + % end + +end + + +if any(strcmp(IN.specific, 'behavior') | all_jobs) + + b_folder = [collection_folder, '/behavior']; + mkdir(b_folder); + + %% visualize behavior vector in barplot + % CTQ.emotional_abuse)), (CTQ.physical_abuse)), (CTQ.sexual_abuse)), (CTQ.emotional_neglect)), (CTQ.physical_neglect)), (CTQ.denial))]; + % load([setup.analysis_folder, '/' setup.date, '_', name, '_data_collection.mat']); + % if sum(ismember(input.behavior_names, 'Age')) + sum(ismember(input.behavior_names, 'Sex'))==2 + % selected_variables = [input.selected_features(1,:), 'Age', 'Sex'; input.selected_features(2,:), 1, 1]; + % elseif sum(ismember(input.behavior_names, 'Age')) + % selected_variables = [input.selected_features(1,:), 'Age'; input.selected_features(2,:), 1]; + % elseif sum(ismember(input.behavior_names, 'Sex')) + % selected_variables = [input.selected_features(1,:), 'Sex'; input.selected_features(2,:), 1]; + % else + % selected_variables = input.selected_features; + % end + + log_f=[]; + for i=1:size(input.selected_features,2) + log_f(i,:) = ~cellfun(@isempty,(strfind(input.behavior_names(1,:), input.selected_features{1,i}))); + end + + if size(log_f,1)>1 + log_c = sum(sum(log_f==0)==size(input.selected_features,2)); + else + log_c = sum((log_f==0)==size(input.selected_features,2)); + end + + selected_variables = [[input.selected_features(1,:), input.behavior_names((end-(log_c-1)):end)]; [input.selected_features(2,:), num2cell(ones(1,log_c))]]; + % compute measures for effects + % Cohen's d + % for i=1:size(output.final_parameters,1) + % x1 = output.final_parameters{i, index_epsilon}; + % x2 = output.final_parameters{i, index_omega}; + % output.Cohen(i) = computeCohen_d(x1, x2, 'independent'); + % output.Spearman(i) = corr(x1, x2, 'Type', 'Spearman'); + % output.Kendall(i) = corr(x1, x2, 'Type', 'Kendall'); + % output.MI_kernel(i) = kernelmi(x1',x2'); + % output.MI_peng(i) = mutualinfo(x1,x2); + % end + + count=0; + for i=1:size(input.selected_features(2,:),2) + temp=size(fieldnames(input.selected_features{2,i}),1); + count=count+temp; + end + colorpattern_LV = colorcube((count + log_c)); + + for i=1:(size(output.final_parameters,1)) + x = output.final_parameters{i,opt_v}; + output.questions_collection.(['LV_', num2str(i)]).items = input.behavior_names(x~=0)'; + output.questions_collection.(['LV_', num2str(i)]).subscales = input.subscales(x~=0)'; + for q=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + output.questions_collection.(['LV_', num2str(i)]).questions{q} = Resilience_Stress_questionnaires(1,strcmp(Resilience_Stress_questionnaires(2,:), output.questions_collection.(['LV_', num2str(i)]).items{q})); + end + + % errors = output.CI_v{i}; + f=figure(); + nn=0; + hold on + temp_legend=[]; temp_all = 0; %sel_temp_names=[]; + for ii=1:(size(selected_variables,2)) + switch class(selected_variables{2,ii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,ii}))}); + for iii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,ii}.(fields{iii}),2); + try temp_handle(nn) = dp_barwitherr(errors([(temp_all+1):(temp_all+temp_current)],:),(temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + catch + temp_handle(nn) = bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + end + % temp_names = input.behavior_names((temp_all+1):(temp_all+temp_current)); + % sel_temp_names = [sel_temp_names; temp_names(x((temp_all+1):(temp_all+temp_current))~=0)]; + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,ii}, ' ', strrep(fields{iii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,ii},2); + try temp_handle(nn) = dp_barwitherr(errors([(temp_all+1):(temp_all+temp_current)],:),(temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + catch + temp_handle(nn) = bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + end + temp_all = temp_all+temp_current; + temp_legend{nn} = strrep(selected_variables{1,ii}, '_', ' '); + hold on + end + end + + try grid_x = input.grid_dynamic.(['LV_', num2str(i)]).x; + grid_x = input.grid_dynamic.(['LV_', num2str(i)]).x; + grid_y = input.grid_dynamic.(['LV_', num2str(i)]).y; + catch + try grid_x = input.grid_dynamic.(['LV_', num2str(i-1)]).x; + grid_y = input.grid_dynamic.(['LV_', num2str(i-1)]).y; + catch + input.grid_dynamic.(['LV_', num2str(i-1)]).x = input.grid_dynamic.(['LV_', num2str(i-2)]).x; + input.grid_dynamic.(['LV_', num2str(i-1)]).y = input.grid_dynamic.(['LV_', num2str(i-2)]).y; + grid_x = input.grid_dynamic.(['LV_', num2str(i-1)]).x; + grid_y = input.grid_dynamic.(['LV_', num2str(i-1)]).y; + end + end + + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + title({first_line; second_line; third_line}); % add third line + xlabel(LV_x, 'FontSize', font_size); + ylabel(LV_y, 'FontSize', font_size); + % if min(x)>=0 + % axis([0 (size(output.final_parameters{i,opt_v},1)+1) 0 1]); + % elseif max(x)<=0 + % axis([0 (size(output.final_parameters{i,opt_v},1)+1) 0 -1]); + % end + legend(temp_handle, temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + hold all + % legend('test', 'Location', 'bestoutside'); + + x_pos = 1:size(x,1); + y_value = x; + ygap = 0.05; % Specify vertical gap between the bar and label + ylimits = get(gca,'YLim'); + % set(gca,'YLim',[ylim(1),ylim(2)+0.2*max(y)]); % Increase y limit for labels + + + for xi=1:size(x_pos,2) % Loop over each bar + % xpos = x_pos(i); % Set x position for the text label + if xi==1 %|| i==size(x_pos,2) + if y_value(xi)<0 + ypos(xi) = ygap; + elseif y_value(xi)>0 + ypos(xi) = -ygap;% Set y position, including gap + else + ypos(xi)=0; + end + else + if y_value(xi)~=0 + if y_value(xi)>0 && y_value(xi-1)>0 %abs(y_value(i)-y_value(i-1))<=ygap + ypos(xi) = ypos(xi-1) - ygap; + elseif y_value(xi)<0 && y_value(xi-1)<0 + ypos(xi) = ypos(xi-1) + ygap; + elseif y_value(xi) > 0 + ypos(xi) = -ygap; + elseif y_value(xi) < 0 + ypos(xi) = ygap; + end + else + ypos(xi)=0; + end + end + if y_value(xi)~=0 + % htext = text(x_pos(xi),ypos(xi),strrep(input.behavior_names{xi},'_',' ')); % Add text label + % set(htext,'VerticalAlignment','bottom','HorizontalAlignment','center', 'FontSize', 8); % Adjust properties + end + end + + + % try annotation(f, 'textbox', [0.79, 0.2, 0.16, 0.4], 'string', strrep(output.questions_collection.(['LV_', num2str(i)]).final, '_', ' '), 'FontSize', 8, 'FitHeightToText', 'on'); + % catch + % end + + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/behavior_LV_' num2str(i)], '-dpng', '-r0'); + saveas(f, [b_folder, '/behavior_LV' num2str(i), '.fig']); + + close(f); + + for qq=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + try output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}, ': ', output.questions_collection.(['LV_', num2str(i)]).questions{1,qq}{1}]; + catch + output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}]; + end + end + + output.questions_collection.(['LV_', num2str(i)]).combined = [output.questions_collection.(['LV_', num2str(i)]).final; output.questions_collection.(['LV_', num2str(i)]).subscales']; + + save(IN.results_path, 'input', 'output', 'setup'); + + end + + % print out questions + fields = fieldnames(output.questions_collection); + FID = fopen([b_folder, '/collected_questions.txt'], 'w'); + fclose(FID); + + for i=1:size(fields,1) + FID = fopen([b_folder, '/collected_questions.txt'], 'a'); + fprintf(FID, ['\n \n \nLV_', num2str(i)]); + fclose(FID); + temp1 = output.questions_collection.(['LV_', num2str(i)]).combined(1,:); + temp2 = output.questions_collection.(['LV_', num2str(i)]).combined(2,:); + temp_unique = unique(output.questions_collection.LV_1.combined(2,:)); + for ii=1:size(temp_unique,2) + FID = fopen([b_folder, '/collected_questions.txt'], 'a'); + fprintf(FID, ['\n \n', temp_unique{ii}]); + fclose(FID); + temp_unique_log = strcmp(temp2, temp_unique{ii}); + temp_unique_coll = temp1(temp_unique_log); + for iii=1:size(temp_unique_coll,2) + FID = fopen([b_folder, '/collected_questions.txt'], 'a'); + fprintf(FID, ['\n', temp_unique_coll{iii}]); + fclose(FID); + end + end + end + + %% plot latent scores epsilon and omega, color code diagnostic groups (HC, + % ROD, ROP, CHR) + + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + % temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; + % temp_1(output.CV.cv_outer_indices.TestInd{1,i}) = true; + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + for i=1:size(output.final_parameters,1) + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f) + end + + % plot latent scores all combined across significant LVs, colorcoded by LVs + colorpattern_LS = hsv(size(output.final_parameters,1)); + f=figure(); temp_legend = []; + for i=1:size(output.final_parameters,1) + x = output.final_parameters{i,index_epsilon}; + y = output.final_parameters{i,index_omega}; + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(i,:)); + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + sig_line = 'significant'; + else + sig_line = 'not significant'; + end + title({first_line;second_line}); % add third line + hold on + temp_legend{i} = ['LV ', num2str(i), ': ', sig_line]; + end + % temp_legend{nn} = selected_variables{1,ii}; + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend(temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/latent_scores_combined_LV_color'], '-dpng', '-r0'); + saveas(f, [b_folder, '/latent_scores_combined_LV_color.fig']); + + close(f) + + % % standardize latent scores and plot all of them in one graph, colorcoded + % % by diagnoses + % % first transpose latent scores so that they fit function, then standardize + % % feature-wise (per LV) + % output.epsilon_stand = (dp_standardize(output.epsilon'))'; + % output.omega_stand = (dp_standardize(output.omega'))'; + + % plot all latent scores according to LVs, colorcoded by diagnoses + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + f=figure(); + for i=1:size(output.final_parameters,1) + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + end + end + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line = ['significant']; + % else + % third_line = ['not significant']; + % end + title({first_line; second_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_combined_diagnosis_color'], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f); + + + end + + + if any(strcmp(IN.specific, 'detailed') | all_jobs) + d_folder = [collection_folder, '/detailed']; + mkdir(d_folder); + + % detailed_results_folder = [IN.results_path(1:(strfind(IN.results_path, 'final')-1)), 'detailed_results']; + cd(d_folder); + + for i=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(i), '.mat']); + if exist('opt_parameters') + if size(opt_parameters,2) == 8 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters; + elseif exist('opt_parameters_temp') + if size(opt_parameters_temp,2) == 8 || size(opt_parameters_temp,2) == 10 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters_temp,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters_temp; + else + disp('Something''s wrong!'); + end + % opt_dir = [detailed_results_folder, '/opt_parameters_' num2str(i)]; + % mkdir(opt_dir); + % cd(opt_dir); + + d_folder_opt = [d_folder, '/opt_parameters_' num2str(i)]; + mkdir(d_folder_opt); + cd(d_folder_opt); + + % for ii=1:size(temp_opt_param,1) + % %% visualize results + % % write brain vector to nifti file + % nk_WriteVol(temp_opt_param{ii,opt_param_u}, ['brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % % dp_resample_image([d_folder_opt, '/brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], [1 1 1]); + % end + + %% visualize behavior vector in barplot + + for ii=1:(size(temp_opt_param,1)) + x = temp_opt_param{ii,opt_param_v}; + f=subplot(round(size(temp_opt_param,1)/2),2,ii); + nn=0; + hold on + temp_legend=[]; temp_all = 0; + for iii=1:size(selected_variables,2) + switch class(selected_variables{2,iii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,iii}))}); + for iiii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,iii}.(fields{iiii}),2); + bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,iii}, ' ', strrep(fields{iiii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,iii},2); + bar((temp_all+1):(temp_all+size(temp_current,2)),x((temp_all+1):(temp_all+size(temp_current,2))), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+size(temp_current,2); + temp_legend{nn} = selected_variables{1,iii}; + hold on + end + end + axis([0 (size(temp_opt_param{ii,opt_param_v},1)+1) -1 1]); + xlabel({'\color{black}clinical features'}, 'FontSize', font_size); + ylabel('weights', 'FontSize', font_size); + if temp_opt_param{ii,opt_param_p}<=output.pvalue_FDR(i) + significance_opt = 'significant'; + else + significance_opt = 'not significant'; + end + subplot_title = {['Iteration ', num2str(ii), ', p-value (FDR-corrected) = ' num2str(temp_opt_param{ii,opt_param_p}), ', Spearman''s RHO = ', num2str(temp_opt_param{ii,opt_param_RHO}), significance_opt]}; + title(subplot_title, 'FontSize', font_size, 'FontWeight', 'normal'); + + end + % set(gcf, 'Position', get(0, 'Screensize')); + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + suptitle({first_line; [second_line, ', ' third_line]}); % add third line + + % print([detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + + print([d_folder_opt, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'png'); + + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'fig'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + close all; + end + + end + + if any(strcmp(IN.specific, 'correlation') | all_jobs) + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + % temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; + % temp_1(output.CV.cv_outer_indices.TestInd{1,i}) = true; + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + s_folder = [collection_folder, '/correlation/' subsets{s}]; + mkdir(s_folder); + + + for i=1:size(output.final_parameters,1) + fields = fieldnames(output.post_hoc_correlations.data_collection]).(['LV_', num2str(i)])); + for ii=1:size(input.selected_studygroups,2) + p_value=output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).epsilon.(input.selected_studygroups{1,ii}).(fields{iii})(2); + + if p_value<=FDR_value + % h1 = lsline; + % h1.Color = 'k'; + % h1.LineWidth = 2; + FDR_value = output.([subsets{s}, '_FDR_values']).(['LV_', num2str(i)]).epsilon.(input.selected_studygroups{1,ii}); + RHO_value = output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).epsilon.(input.selected_studygroups{1,ii}).(fields{iii})(1); + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ' grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = strrep([input.selected_studygroups{1,ii}, ', epsilon x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line1 = 'LV significant'; + else + third_line1 = 'LV not significant'; + end + f=figure(); + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii})(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + third_line2 = 'correlation significant'; + plot(x,y,'.', 'MarkerSize', marker_size, 'color','k'); + title(['p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], 'FontSize', font_size); % add third line + xlabel(LS_epsilon, 'FontSize', font_size); + y_temp = strrep(fields{iii}, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); + [~, lgd_data] = legend({input.selected_studygroups{1,ii}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', input.selected_studygroups{1,ii}, '_epsilon_', fields{iii}], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', input.selected_studygroups{1,ii}, '_epsilon_', fields{iii}, '.fig']); + close(f); + + else + third_line2 = 'correlation not significant'; + end + + end + + % g=figure(); + % a = output.final_parameters{i, index_epsilon}; + % b = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii}); + % plot(a,b,'.', 'MarkerSize', marker_size, 'color','k'); + % hold on + % h1 = lsline; + % h1.Color = 'k'; + % h1.LineWidth = 2; + % k1=h1; + + + p_value=output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).epsilon.all.(fields{iii})(2); + FDR_value = output.([subsets{s}, '_FDR_values']).(['LV_', num2str(i)]).epsilon.all; + RHO_value = output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).epsilon.all.(fields{iii})(1); + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ' grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = strrep(['all groups, epsilon x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line1 = 'LV significant'; + else + third_line1 = 'LV not significant'; + end + + if p_value<=FDR_value + third_line2 = 'correlation significant'; + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii})(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + % ax1.Visible = 'off'; + hold on + % title({first_line; second_line; [third_line1, ' ', third_line2]}); % add third line + + end + xlabel(LS_epsilon, 'FontSize', font_size); + y_temp = strrep(fields{iii}, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); + title(['p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], 'FontSize', font_size); % add third line + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + hold on + % line(k1.XData, k1.YData, 'Color','k','LineWidth',2); + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', 'all_epsilon_', fields{iii}], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', 'all_epsilon_', fields{iii}, '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close all; + + + else + third_line2 = 'correlation not significant'; + end + + + for iii=1:size(fields,1) + for ii=1:size(input.selected_studygroups,2) + % h1 = lsline; + % h1.Color = 'k'; + % h1.LineWidth = 2; + p_value = output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).omega.(input.selected_studygroups{1,ii}).(fields{iii})(2); + FDR_value = output.([subsets{s}, '_FDR_values']).(['LV_', num2str(i)]).omega.(input.selected_studygroups{1,ii}); + RHO_value = output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).omega.(input.selected_studygroups{1,ii}).(fields{iii})(1); + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = strrep([input.selected_studygroups{1,ii}, ', omega x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line1 = 'LV significant'; + else + third_line1 = 'LV not significant'; + end + + if p_value<=FDR_value + third_line2 = 'correlation significant'; + f=figure(); + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii})(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color','k'); + title(['p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], 'FontSize', font_size); % add third line + xlabel(LS_omega, 'FontSize', font_size); + y_temp = strrep(fields{iii}, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); [~, lgd_data] = legend({input.selected_studygroups{1,ii}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', input.selected_studygroups{1,ii}, '_omega_', fields{iii}], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', input.selected_studygroups{1,ii}, '_omega_', fields{iii}, '.fig']); + close(f); + + else + third_line2 = 'correlation not significant'; + end + + + end + + % g=figure(); + % a = output.final_parameters{i, index_omega}; + % b = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii}); + % plot(a,b,'.', 'MarkerSize', 20, 'color','k'); + % hold on + % h1 = lsline; + % h1.Color = 'k'; + % h1.LineWidth = 2; + % k1=h1; + + + p_value=output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).omega.all.(fields{iii})(2); + FDR_value = output.([subsets{s}, '_FDR_values']).(['LV_', num2str(i)]).omega.all; + RHO_value = output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).omega.all.(fields{iii})(1); + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = strrep(['all groups, omega x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line1 = 'LV significant'; + else + third_line1 = 'LV not significant'; + end + + if p_value<=FDR_value + third_line2 = 'correlation significant'; + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii})(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + + title(['p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], 'FontSize', font_size); % add third line + xlabel(LS_omega, 'FontSize', font_size); + y_temp = strrep(fields{iii}, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + hold on + % line(k1.XData, k1.YData, 'Color','k','LineWidth',2); + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', 'all_omega_', fields{iii}], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', 'all_omega_', fields{iii}, '.fig']); + + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close all; + + else + third_line2 = 'correlation not significant'; + end + end + + % plot correlations according to main dimension, all groups + % combined, colorcoded for dimensions + % GAF + % fields = fieldnames(output.hold_out_corr_data.LV_1); + % dimensions_selection = {'GAF', 'GF', 'BDI', 'WHO'}; + % for d=1:size(dimensions_selection,2) + % leg_selection = ~cellfun(@isempty, strfind(fields, dimensions_selection{d})); + % colorpattern_DD = hsv(sum(leg_selection)); + % for dd=1:size(fields,1) + % if leg_selection(dd) + % x1 = output.final_parameters{i, index_omega}; + % y = output.hold_out_corr_data.(['LV_', num2str(i)]).(fields{dd}); + % plot(x1,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_DD(dd,:)); + % hold on + % end + % + % end + % end + + % p_value=output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).omega.all.(fields{iii})(2); + % FDR_value = output.([subsets{s}, '_FDR_values']).(['LV_', num2str(i)]).omega.all; + % RHO_value = output.([subsets{s}, '_correlations']).(['LV_', num2str(i)]).omega.all.(fields{iii})(1); + % first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + % second_line = strrep(['all groups, omega x ', fields{iii}, ', p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], '_', ' '); + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line1 = 'LV significant'; + % else + % third_line1 = 'LV not significant'; + % end + % + % if p_value<=FDR_value + % third_line2 = 'correlation significant'; + % f=figure(); + % for ii=1:size(input.selected_studygroups,2) + % w = output.final_parameters{i,1}; + % x = output.final_parameters{i,index_omega}; + % y = output.([subsets{s}, '_corr_data']).(['LV_', num2str(i)]).(fields{iii})(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + % plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + % hold on + % end + % + % title(['p-value = ' num2str(p_value), ', Spearman''s RHO = ', num2str(RHO_value)], 'FontSize', font_size); % add third line + % xlabel(LS_omega, 'FontSize', font_size); + % y_temp = strrep(fields{iii}, '_', ' '); + % y_temp = strrep(y_temp, 'T0',''); + % y_temp = strrep(y_temp, 'Screening',''); + % ylabel(y_temp, 'FontSize', font_size); + % [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + % lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + % set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + % hold on + % % line(k1.XData, k1.YData, 'Color','k','LineWidth',2); + % + % set(gcf, 'Position', get(0, 'Screensize')); + % set(gcf,'PaperPositionMode','auto') + % % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % print(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', 'all_omega_', fields{iii}], '-dpng', '-r0'); + % % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % % saveas(f, [s_folder, '/latent_scores_LV', num2str(i), '_', subsets{s}, '_', 'all_omega_', fields{iii}, '.fig']); + % + % % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + % close all; + + + end + + + save(IN.results_path, 'input', 'output', 'setup'); + + end + +end \ No newline at end of file diff --git a/Visualization_Module/dp_visualize_data_multi_2020.m b/Visualization_Module/dp_visualize_data_multi_2020.m new file mode 100644 index 0000000..dd5454c --- /dev/null +++ b/Visualization_Module/dp_visualize_data_multi_2020.m @@ -0,0 +1,930 @@ +%% function to visualize SPLS output + +function dp_visualize_data_multi_2020(IN) + +load(IN.results_path); + +switch IN.overall_analysis + case 'Stress' + overall_folder = '/volume/HCStress/Analysis/Stress'; + case 'Resilience' + overall_folder = '/volume/HCStress/Analysis/Resilience'; +end + +folder_name = [setup.date, '_', input.name]; +if contains(IN.results_path, 'final_vis') + collection_folder = [overall_folder, '/', folder_name, '/final_vis']; + val_log = false; +elseif contains(IN.results_path, 'validation_vis') + collection_folder = [overall_folder, '/', folder_name, '/validation_vis']; + val_log = true; +end + +mkdir(collection_folder); + +if ~isfield(IN, 'specific') + all_jobs = true; + IN.specific = 'empty'; +else + all_jobs = false; +end + +marker_size = 14; +font_size = 14; +LV_x = 'clinical features'; +LV_y = 'weights'; +LS_epsilon = 'brain score'; +LS_omega = 'behavior score'; + +%% compute sociodemographic and clinical outcome correlations +if any(strcmp(IN.specific, 'sociodemographic') | all_jobs) + [input, output, setup] = dp_sociodemographic_2020(IN); +end + +if val_log + output.final_parameters = output.validation_results; +end + +input.behavior_names = strrep(input.behavior_names, '_T0', ''); +load(input.NM_structure); + +%% prepare questionnaire data +load('/volume/HCStress/Doc/Stress_Resilience_questionnaires.mat'); +Resilience_Stress_questionnaires = [CISS_questionnaire', RSA_questionnaire', CTQ_questionnaire', BS_questionnaire']; + +% define column names for matrices so that you can access them later by +% indexing +% output.parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop +opt_parameters_names = {'w', 'cu', 'cv', 'u', 'v', 'success', 'RHO', 'p'}; +opt_parameters_names_long = {'w', 'cu', 'cv', 'u', 'v', 'U_opt', 'S_opt' ,'V_opt', 'success', 'RHO', 'p'}; % names of parameters for optimal cu/cv combination of the w-th loop + +% indices for later use +opt_u = strcmp(output.parameters_names,'u'); +opt_v = strcmp(output.parameters_names,'v'); +opt_p = strcmp(output.parameters_names, 'p'); +opt_RHO = strcmp(output.parameters_names, 'RHO'); +index_epsilon = strcmp(output.parameters_names, 'epsilon'); +index_omega = strcmp(output.parameters_names, 'omega'); + +if any(strcmp(IN.specific, 'images') | all_jobs) + i_folder = [collection_folder, '/images']; + mkdir(i_folder); + cd(i_folder); + % write brain vector to nifti file + for i=1:size(output.final_parameters,1) + nk_WriteVol(output.final_parameters{i,4}, ['brain_LV_final_' num2str(i)], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + end +end + +if any(strcmp(IN.specific, 'atlas') | all_jobs) + a_folder = [collection_folder, '/atlases']; + mkdir(a_folder); + atlases = {'brainnetome', 'cerebellum'}; + % get clusters for brain regions using hammers and aal atlas + % filepath hammers nifti: /opt/SPM/spm12_v6685_cat12_r1207/atlas/hammers.nii + % filepath hammers description: /opt/SPM/spm12_v6685_cat12_r1207/atlas/labels_dartel_hammers.xml + + for aa=1:size(atlases,2) + + switch atlases{aa} + case 'hammers' + switch size(input.behavior,1) + case 626 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__626_NM_X.mat'; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__627_NM_X.mat'; + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 631 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__631_NM_X.mat'; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30r95_spm12_full__634_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/hammers_3mm_649_X.mat'; + a_number = 1; + end + temp = load('/volume/HCStress/Data/MRI/Hammers_Atlas/Hammers_mith-n30-ancillary-data.tar/Hammers_mith-n30-ancillary-data/Hammers_mith-n30-ancillary-data/labels_hammerssmith_n30_ancillary_data.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + for i=1:size(labels_regions,1) + strf = strfind(labels_regions{i,2}, ' '); + labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + end + + case 'juelich' + switch size(input.behavior,1) + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Julich_Atlas/juelich_maxprob_0_r3mm_630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Julich_Atlas/juelich_maxprob_0_r3mm_627_NM_X.mat'; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Julich_Atlas/juelich_maxprob_0_r3mm_634_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_hammers_juelich_3mm_649_X.mat'; + a_number = 3; + end + temp = load('/volume/HCStress/Data/MRI/Julich_Atlas/juelich_indices_labels.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + for i=1:size(labels_regions,1) + strf = strfind(labels_regions{i,2}, ' '); + labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + end + + case 'brainnetome' + switch size(input.behavior,1) + case 621 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_CISSRSAPAS_3mm_621_X.mat'; + case 627 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_627_CTQ_BS_NM_X.mat'; + case 630 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_630_CISS_RSA_NM_X.mat'; + a_number = 1; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_634_CISS_RSA_NM_X.mat'; + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_636_CISS_NM_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/BNA_hammers_juelich_3mm_649_X.mat'; + a_number = 1; + case 652 + atlas_path_full = '/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_3mm_652_WSS_NM_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Brainnetome_Atlas/brainnetome_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + % for i=1:size(labels_regions,1) + % strf = strfind(labels_regions{i,2}, '_'); + % labels_regions{i,3} = labels_regions{i,2}(1:(strf-1)); + % end + case 'cerebellum' + switch size(input.behavior,1) + case 621 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_621_CISSRSAPAS_X.mat'; + case 636 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_636_CISS_X.mat'; + case 649 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_649_CTQ_X.mat'; + case 634 + atlas_path_full = '/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/Cerebellum-MNIflirt_634_CISSRSA_X.mat'; + end + temp = load('/volume/HCStress/Data/MRI/Cerebellum-MNIflirt-MRICroN/CerebellumMNIflirt_indices.mat'); + fields = fieldnames(temp); + labels_regions = temp.(fields{1}); + + + end + + temp=load(atlas_path_full); + fields = fieldnames(temp); + if exist('a_number', 'var') + atlas_for_analysis = round(temp.(fields{1})(a_number,:)); + else + atlas_for_analysis = round(temp.(fields{1})); + end + [C_atlas, ~, ic_atlas] = unique(atlas_for_analysis); + counts_atlas = accumarray(ic_atlas, 1); + output.regions.(atlases{aa}).regions_count = [C_atlas', counts_atlas]; + output.regions.(atlases{aa}).count_names = {'region_number', 'n_voxels', 'voxel_percentage', 'region_name', 'median_weights', 'mean_weights'}; + output.regions.(atlases{aa}).count = struct('raw', [], 'voxels', [], 'weights', []); + output.regions.(atlases{aa}).count_names_sorting = {'positive', 'negative', 'overall'}; + voxel_cutoff = 0; + fields = fieldnames(output.regions.(atlases{aa}).count); + for i=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count_names_sorting,2) + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_', fields{i}, '.txt'], 'w'); + fprintf(FID, [strrep(input.name,'_',' '), '\n', fields{i} ' sorted']); + fclose(FID); + end + end + + for i=1:size(output.final_parameters,1) + output.regions.(atlases{aa}).log{i,1} = output.final_parameters{i,4}>0; + output.regions.(atlases{aa}).log{i,2} = output.final_parameters{i,4}<0; + output.regions.(atlases{aa}).log{i,3} = output.final_parameters{i,4}~=0; + for ii=1:size(output.regions.(atlases{aa}).log,2) + output.regions.(atlases{aa}).sum{i,ii} = atlas_for_analysis((atlas_for_analysis~=0)' & output.regions.(atlases{aa}).log{i,ii}); + [C, ~, ic] = unique(output.regions.(atlases{aa}).sum{i,ii}); + a_counts = accumarray(ic, 1); + voxel_percentage = a_counts./(counts_atlas(ismember(C_atlas, C))); + output.regions.(atlases{aa}).count.raw{i,ii} = [num2cell(C'), num2cell(a_counts), num2cell(voxel_percentage), labels_regions((C'),2)]; + fix_end = size(output.regions.(atlases{aa}).count.raw{i,ii},2); + log_cutoff = cell2mat(output.regions.(atlases{aa}).count.raw{i,ii}(:, find(strcmp(output.regions.(atlases{aa}).count_names, 'n_voxels')))) 0 + strf = strfind(mat{1,4}, ' '); + temp_prefix = mat{1,4}(1:(strf-1)); + strf_log = ~cellfun(@isempty, strfind(mat(:,4), temp_prefix)); + temp_mat = [temp_mat; mat(strf_log,:)]; + output.regions.(atlases{aa}).count.alphabet{b,bb} = temp_mat; + mat(strf_log,:)=[]; + end + end + end + + else + temp = output.regions.(atlases{aa}).count.voxels; + for b=1:size(temp,1) + for bb=1:size(temp,2) + mat = temp{b,bb}; + if ~isempty(mat) + [~,idu] = sort([mat{:,4}]'); + output.regions.(atlases{aa}).count.alphabet{b,bb} = mat(idu,:); + else + output.regions.(atlases{aa}).count.alphabet{b,bb} = mat; + end + end + end + + end + + for fi=1:size(output.regions.(atlases{aa}).count.alphabet,1) + temp_add = []; + temp_print = output.regions.(atlases{aa}).count_names([1,2,4,5,3]); + for f=1:size(temp_print,2) + % try output.regions.(atlases{aa}).count_names{f} = num2str(temp_print{f}); + % catch ME + % end + temp_add = [temp_add, sprintf('\t'), temp_print{f}]; + end + % fields = fieldnames(output.regions.(atlases{aa}).count); + % for ff=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count.alphabet,2) + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_alphabet.txt'], 'a'); + fprintf(FID, ['\n \n \n', sprintf('\t'), 'Latent Variable ' num2str(fi), '\n \n', temp_add, '\n']); + fclose(FID); + end + % end + + % fields = fieldnames(output.regions.(atlases{aa}).count); + % for ff=1:size(fields,1) + for fff = 1:size(output.regions.(atlases{aa}).count.alphabet,2) + if ~isempty(output.regions.(atlases{aa}).count.alphabet{fi,fff}) + for r=1:size(output.regions.(atlases{aa}).count.alphabet{fi,fff},1) + temp_print = output.regions.(atlases{aa}).count.alphabet{fi,fff}(r,[1,2,4,5,3]); + temp_add = []; + for ii=1:size(temp_print,2) + try temp_print{ii} = num2str(temp_print{ii}); + catch ME + end + temp_add = [temp_add, sprintf('\t'), temp_print{ii}]; + end + + FID = fopen([a_folder, '/brain_regions_', atlases{aa}, '_', output.regions.(atlases{aa}).count_names_sorting{fff}, '_alphabet.txt'], 'a'); + fprintf(FID, ['\n' temp_add]); + fclose(FID); + end + end + end + % end + end + + end + + end + % just deactivated for paper preparation, needs to be reactivated + % later on + load(input.MRI) + MRI_volumes = MRI_for_analysis; + for i=1:size(output.final_parameters,1) + u=output.final_parameters{i,4}; + % log_u{1,1} = u>0; + % log_u{1,2} = u<0; + for ii=1:(size(output.regions.(atlases{aa}).log,2)-1) + log_weights = output.regions.(atlases{aa}).log{i,ii}'; + for iii=1:size(output.regions.(atlases{aa}).count.alphabet{i,ii},1) + log_region = atlas_for_analysis == output.regions.(atlases{aa}).count.alphabet{i,ii}{iii,1}; + output.volumes.(atlases{aa}).names{i,ii} = [output.regions.(atlases{aa}).count.alphabet{i,ii}(:,1)';output.regions.(atlases{aa}).count.alphabet{i,ii}(:,4)']; + % for s=1:size(MRI_volumes,1) + output.volumes.(atlases{aa}).raw{i,ii}(:,iii) = sum(MRI_volumes(:,log_region&log_weights),2); + % output.volumes.(['LV_', num2str(i)]).weighted.(atlases{aa}){s,iii} = sum(output.final_parameters{i,4}(log_region&log_weights)' .* MRI_volumes(s,log_region&log_weights)); + % end + end + end + % MRI_volumes = MRI_volumes - (MRI_volumes*u)*u'; + end + + end + + save(IN.results_path, 'input', 'output', 'setup'); + % end + +end + + +if any(strcmp(IN.specific, 'behavior') | all_jobs) + + b_folder = [collection_folder, '/behavior']; + mkdir(b_folder); + + %% visualize behavior vector in barplot + % CTQ.emotional_abuse)), (CTQ.physical_abuse)), (CTQ.sexual_abuse)), (CTQ.emotional_neglect)), (CTQ.physical_neglect)), (CTQ.denial))]; + % load([setup.analysis_folder, '/' setup.date, '_', name, '_data_collection.mat']); + % if sum(ismember(input.behavior_names, 'Age')) + sum(ismember(input.behavior_names, 'Sex'))==2 + % selected_variables = [input.selected_features(1,:), 'Age', 'Sex'; input.selected_features(2,:), 1, 1]; + % elseif sum(ismember(input.behavior_names, 'Age')) + % selected_variables = [input.selected_features(1,:), 'Age'; input.selected_features(2,:), 1]; + % elseif sum(ismember(input.behavior_names, 'Sex')) + % selected_variables = [input.selected_features(1,:), 'Sex'; input.selected_features(2,:), 1]; + % else + % selected_variables = input.selected_features; + % end + + log_f=[]; + for i=1:size(input.selected_features,2) + log_f(i,:) = contains(input.behavior_names(1,:), input.selected_features{1,i}); + end + + if size(log_f,1)>1 + log_c = sum(sum(log_f==0)==size(input.selected_features,2)); + else + log_c = sum((log_f==0)==size(input.selected_features,2)); + end + + selected_variables = [[input.selected_features(1,:), input.behavior_names((end-(log_c-1)):end)]; [input.selected_features(2,:), num2cell(ones(1,log_c))]]; + % compute measures for effects + % Cohen's d + for i=1:size(output.final_parameters,1) + x1 = output.final_parameters{i, index_epsilon}; + x2 = output.final_parameters{i, index_omega}; + output.Cohen(i) = computeCohen_d(x1, x2, 'independent'); + output.Spearman(i) = corr(x1, x2, 'Type', 'Spearman'); + output.Kendall(i) = corr(x1, x2, 'Type', 'Kendall'); + output.MI_kernel(i) = kernelmi(x1',x2'); + output.MI_peng(i) = mutualinfo(x1,x2); + end + + count=0; + for i=1:size(input.selected_features(2,:),2) + temp=size(fieldnames(input.selected_features{2,i}),1); + count=count+temp; + end + colorpattern_LV = colorcube((count + log_c)); + + for i=1:(size(output.final_parameters,1)) + x = output.final_parameters{i,opt_v}; + output.questions_collection.(['LV_', num2str(i)]).items = input.behavior_names(x~=0)'; + output.questions_collection.(['LV_', num2str(i)]).subscales = input.subscales(x~=0)'; + for q=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + output.questions_collection.(['LV_', num2str(i)]).questions{q} = Resilience_Stress_questionnaires(1,strcmp(Resilience_Stress_questionnaires(2,:), output.questions_collection.(['LV_', num2str(i)]).items{q})); + end + + % errors = output.CI_v{i}; + f=figure(); + nn=0; + hold on + temp_legend=[]; temp_all = 0; %sel_temp_names=[]; + for ii=1:(size(selected_variables,2)) + switch class(selected_variables{2,ii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,ii}))}); + for iii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,ii}.(fields{iii}),2); + try temp_handle(nn) = dp_barwitherr(errors([(temp_all+1):(temp_all+temp_current)],:),(temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + catch + temp_handle(nn) = bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + end + % temp_names = input.behavior_names((temp_all+1):(temp_all+temp_current)); + % sel_temp_names = [sel_temp_names; temp_names(x((temp_all+1):(temp_all+temp_current))~=0)]; + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,ii}, ' ', strrep(fields{iii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,ii},2); + try temp_handle(nn) = dp_barwitherr(errors([(temp_all+1):(temp_all+temp_current)],:),(temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + catch + temp_handle(nn) = bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + end + temp_all = temp_all+temp_current; + temp_legend{nn} = strrep(selected_variables{1,ii}, '_', ' '); + hold on + end + end + + if any(i==input.grid_dynamic.onset) + grid_x = input.grid_dynamic.(['LV_', num2str(i)]).x; + grid_y = input.grid_dynamic.(['LV_', num2str(i)]).y; + end + + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + + if ~val_log + p_sig_threshold = output.pvalue_FDR(i); + fourth_line = 'main sample'; + else + p_sig_threshold = 0.05; + fourth_line = 'validation sample'; + end + + if output.final_parameters{i,opt_p}<=p_sig_threshold + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line; fourth_line}); % add third line + xlabel(LV_x, 'FontSize', font_size); + ylabel(LV_y, 'FontSize', font_size); + + legend(temp_handle, temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + hold all + + x_pos = 1:size(x,1); + y_value = x; + ygap = 0.05; % Specify vertical gap between the bar and label + ylimits = get(gca,'YLim'); + % set(gca,'YLim',[ylim(1),ylim(2)+0.2*max(y)]); % Increase y limit for labels + + + for xi=1:size(x_pos,2) % Loop over each bar + % xpos = x_pos(i); % Set x position for the text label + if xi==1 %|| i==size(x_pos,2) + if y_value(xi)<0 + ypos(xi) = ygap; + elseif y_value(xi)>0 + ypos(xi) = -ygap;% Set y position, including gap + else + ypos(xi)=0; + end + else + if y_value(xi)~=0 + if y_value(xi)>0 && y_value(xi-1)>0 %abs(y_value(i)-y_value(i-1))<=ygap + ypos(xi) = ypos(xi-1) - ygap; + elseif y_value(xi)<0 && y_value(xi-1)<0 + ypos(xi) = ypos(xi-1) + ygap; + elseif y_value(xi) > 0 + ypos(xi) = -ygap; + elseif y_value(xi) < 0 + ypos(xi) = ygap; + end + else + ypos(xi)=0; + end + end + if y_value(xi)~=0 + % htext = text(x_pos(xi),ypos(xi),strrep(input.behavior_names{xi},'_',' ')); % Add text label + % set(htext,'VerticalAlignment','bottom','HorizontalAlignment','center', 'FontSize', 8); % Adjust properties + end + end + + + % try annotation(f, 'textbox', [0.79, 0.2, 0.16, 0.4], 'string', strrep(output.questions_collection.(['LV_', num2str(i)]).final, '_', ' '), 'FontSize', 8, 'FitHeightToText', 'on'); + % catch + % end + + set(gcf,'Position', get(0,'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/behavior_LV_' num2str(i)], '-dpng', '-r0'); + saveas(f, [b_folder, '/behavior_LV' num2str(i), '.fig']); + % saveas(f, [b_folder, '/behavior_LV' num2str(i), '.eps']); + saveas(f,[b_folder, '/behavior_LV' num2str(i)],'epsc'); + + close(f); + output.questions_collection.(['LV_', num2str(i)]).final={}; + for qq=1:size(output.questions_collection.(['LV_', num2str(i)]).items,1) + try output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}, ': ', output.questions_collection.(['LV_', num2str(i)]).questions{1,qq}{1}]; + catch + output.questions_collection.(['LV_', num2str(i)]).final{qq} = [output.questions_collection.(['LV_', num2str(i)]).items{qq}]; + end + end + + output.questions_collection.(['LV_', num2str(i)]).combined = [output.questions_collection.(['LV_', num2str(i)]).final; output.questions_collection.(['LV_', num2str(i)]).subscales']; + + save(IN.results_path, 'input', 'output', 'setup'); + + end + + % print out questions + fields = fieldnames(output.questions_collection); + FID = fopen([b_folder, '/collected_questions.txt'], 'w'); + fclose(FID); + + for i=1:size(fields,1) + FID = fopen([b_folder, '/collected_questions.txt'], 'a'); + fprintf(FID, ['\n \n \nLV_', num2str(i)]); + fclose(FID); + temp1 = output.questions_collection.(['LV_', num2str(i)]).combined(1,:); + temp2 = output.questions_collection.(['LV_', num2str(i)]).combined(2,:); + temp_unique = unique(output.questions_collection.LV_1.combined(2,:)); + for ii=1:size(temp_unique,2) + FID = fopen([b_folder, '/collected_questions.txt'], 'a'); + fprintf(FID, ['\n \n', temp_unique{ii}]); + fclose(FID); + temp_unique_log = strcmp(temp2, temp_unique{ii}); + temp_unique_coll = temp1(temp_unique_log); + for iii=1:size(temp_unique_coll,2) + FID = fopen([b_folder, '/collected_questions.txt'], 'a'); + fprintf(FID, ['\n', temp_unique_coll{iii}]); + fclose(FID); + end + end + end + + %% plot latent scores epsilon and omega, color code diagnostic groups (HC, + % ROD, ROP, CHR) + + for i=1:size(input.selected_studygroups,2) + input.selected_studygroups{2,i} = strcmp(input.data_collection.Labels, input.selected_studygroups{1,i}); + end + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + % temp_1 = zeros(size(input.selected_studygroups{2,1},1),size(input.selected_studygroups{2,1},2))>0; + % temp_1(output.CV.cv_outer_indices.TestInd{1,i}) = true; + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + for i=1:size(output.final_parameters,1) + f=figure(); + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ', grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = 'significant'; + else + third_line = 'not significant'; + end + title({first_line; second_line; third_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [b_folder, '/latent_scores_LV' num2str(i), '.eps']); + saveas(f,[b_folder, '/latent_scores_LV' num2str(i)],'epsc'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f) + end + + % plot latent scores all combined across significant LVs, colorcoded by LVs + colorpattern_LS = hsv(size(output.final_parameters,1)); + f=figure(); temp_legend = []; + for i=1:size(output.final_parameters,1) + x = output.final_parameters{i,index_epsilon}; + y = output.final_parameters{i,index_omega}; + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(i,:)); + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + sig_line = 'significant'; + else + sig_line = 'not significant'; + end + title({first_line;second_line}); % add third line + hold on + temp_legend{i} = ['LV ', num2str(i), ': ', sig_line]; + end + % temp_legend{nn} = selected_variables{1,ii}; + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend(temp_legend, 'Location', 'bestoutside', 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [b_folder, '/latent_scores_combined_LV_color'], '-dpng', '-r0'); + saveas(f, [b_folder, '/latent_scores_combined_LV_color.fig']); + % saveas(f, [b_folder, '/latent_scores_combined_LV_color.eps']); + saveas(f,[b_folder, '/latent_scores_combined_LV_color'],'epsc'); + close(f) + + % % standardize latent scores and plot all of them in one graph, colorcoded + % % by diagnoses + % % first transpose latent scores so that they fit function, then standardize + % % feature-wise (per LV) + % output.epsilon_stand = (dp_standardize(output.epsilon'))'; + % output.omega_stand = (dp_standardize(output.omega'))'; + + % plot all latent scores according to LVs, colorcoded by diagnoses + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + f=figure(); + for i=1:size(output.final_parameters,1) + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + for ii=1:size(input.selected_studygroups,2) + w = output.final_parameters{i,1}; + x = output.final_parameters{i,index_epsilon}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + y = output.final_parameters{i,index_omega}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,ii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(ii,:)); + hold on + end + end + end + first_line = strrep([input.name, ', grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density)], '_', ' '); + second_line = 'latent scores from all LVs combined'; + % if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + % third_line = ['significant']; + % else + % third_line = ['not significant']; + % end + title({first_line; second_line}); % add third line + xlabel(LS_epsilon); + ylabel(LS_omega); + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + % print(f, [results_folder, '/latent_scores_LV' num2str(i)], '-dpng', '-r0'); + print(f, [b_folder, '/latent_scores_combined_diagnosis_color'], '-dpng', '-r0'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.fig']); + % saveas(f, [b_folder, '/latent_scores_combined_diagnosis_color', '.eps']); + saveas(f,[b_folder, '/latent_scores_combined_diagnosis_color'],'epsc'); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.fig']); + % saveas(f, [results_folder, '/latent_scores_LV' num2str(i), '.png']); + close(f); + + +end + +if any(strcmp(IN.specific, 'detailed') | all_jobs) + d_folder = [collection_folder, '/detailed']; + mkdir(d_folder); + + % detailed_results_folder = [IN.results_path(1:(strfind(IN.results_path, 'final')-1)), 'detailed_results']; + cd(d_folder); + + for i=1:size(output.final_parameters,1) + load([detailed_results_folder, '/opt_parameters_' num2str(i), '.mat']); + if exist('opt_parameters') + if size(opt_parameters,2) == 8 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters; + elseif exist('opt_parameters_temp') + if size(opt_parameters_temp,2) == 8 || size(opt_parameters_temp,2) == 10 + opt_param_p = strcmp(opt_parameters_names, 'p'); + opt_param_v = strcmp(opt_parameters_names, 'v'); + opt_param_u = strcmp(opt_parameters_names, 'u'); + opt_param_RHO = strcmp(opt_parameters_names, 'RHO'); + elseif size(opt_parameters_temp,2) == 11 + opt_param_p = strcmp(opt_parameters_names_long, 'p'); + opt_param_v = strcmp(opt_parameters_names_long, 'v'); + opt_param_u = strcmp(opt_parameters_names_long, 'u'); + opt_param_RHO = strcmp(opt_parameters_names_long, 'RHO'); + else + disp('Something''s wrong!'); + end + temp_opt_param = opt_parameters_temp; + else + disp('Something''s wrong!'); + end + % opt_dir = [detailed_results_folder, '/opt_parameters_' num2str(i)]; + % mkdir(opt_dir); + % cd(opt_dir); + + d_folder_opt = [d_folder, '/opt_parameters_' num2str(i)]; + mkdir(d_folder_opt); + cd(d_folder_opt); + + % for ii=1:size(temp_opt_param,1) + % %% visualize results + % % write brain vector to nifti file + % nk_WriteVol(temp_opt_param{ii,opt_param_u}, ['brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], 2, NM.brainmask{1,1}, cell2mat(NM.badcoords), 0, 'gt'); + % % dp_resample_image([d_folder_opt, '/brain_opt_parameters_' num2str(i), '_' num2str(temp_opt_param{ii,1})], [1 1 1]); + % end + + %% visualize behavior vector in barplot + + for ii=1:(size(temp_opt_param,1)) + x = temp_opt_param{ii,opt_param_v}; + f=subplot(round(size(temp_opt_param,1)/2),2,ii); + nn=0; + hold on + temp_legend=[]; temp_all = 0; + for iii=1:size(selected_variables,2) + switch class(selected_variables{2,iii}) + case 'struct' + fields = fieldnames(input.selected_features{2,(strcmp(input.selected_features(1,:),input.selected_features{1,iii}))}); + for iiii=1:size(fields,1) + nn=nn+1; + temp_current=size(selected_variables{2,iii}.(fields{iiii}),2); + bar((temp_all+1):(temp_all+temp_current),x((temp_all+1):(temp_all+temp_current)), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+temp_current; + temp_legend{nn} = [selected_variables{1,iii}, ' ', strrep(fields{iiii}, '_', ' ')]; + hold on + end + case 'double' + nn=nn+1; + temp_current=size(selected_variables{2,iii},2); + bar((temp_all+1):(temp_all+size(temp_current,2)),x((temp_all+1):(temp_all+size(temp_current,2))), 'FaceColor', colorpattern_LV(nn,:)); + temp_all = temp_all+size(temp_current,2); + temp_legend{nn} = selected_variables{1,iii}; + hold on + end + end + axis([0 (size(temp_opt_param{ii,opt_param_v},1)+1) -1 1]); + xlabel({'\color{black}clinical features'}, 'FontSize', font_size); + ylabel('weights', 'FontSize', font_size); + if temp_opt_param{ii,opt_param_p}<=output.pvalue_FDR(i) + significance_opt = 'significant'; + else + significance_opt = 'not significant'; + end + subplot_title = {['Iteration ', num2str(ii), ', p-value (FDR-corrected) = ' num2str(temp_opt_param{ii,opt_param_p}), ', Spearman''s RHO = ', num2str(temp_opt_param{ii,opt_param_RHO}), significance_opt]}; + title(subplot_title, 'FontSize', font_size, 'FontWeight', 'normal'); + + end + % set(gcf, 'Position', get(0, 'Screensize')); + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + first_line = strrep([input.name, ' grid_density_x=' num2str(grid_x.density), ' grid_density_y=' num2str(grid_y.density), ', LV ',num2str(i)], '_', ' '); + second_line = ['p-value (FDR-corrected) = ' num2str(output.final_parameters{i,opt_p}), ', Spearman''s RHO = ', num2str(output.final_parameters{i,opt_RHO})]; + if output.final_parameters{i,opt_p}<=output.pvalue_FDR(i) + third_line = ['significant']; + else + third_line = ['not significant']; + end + suptitle({first_line; [second_line, ', ' third_line]}); % add third line + + % print([detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + + print([d_folder_opt, '/behavior_opt_parameters_',num2str(i)], '-dpng', '-r0'); + saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'png'); + % saveas(f, [d_folder_opt, '/behavior_opt_parameters_',num2str(i)], 'eps'); + saveas(f,[d_folder_opt, '/behavior_opt_parameters_',num2str(i)],'epsc'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'fig'); + % saveas(f, [detailed_results_folder, '/behavior_opt_parameters_',num2str(i)], 'png'); + close all; + end + +end + +if any(strcmp(IN.specific, 'correlation') | all_jobs) + + for i=1:size(output.CV.cv_outer_indices.TestInd,2) + output.CV.cv_outer_indices.TestInd{2,i} = input.data_collection.Labels(output.CV.cv_outer_indices.TestInd{1,i}); + end + + % plot all latent scores according to LVs + s_folder = [collection_folder, '/correlation']; + mkdir(s_folder); + latent_selection = {index_epsilon, index_omega; LS_epsilon, LS_omega; 'epsilon', 'omega'}; + colorpattern_LS = hsv(size(input.selected_studygroups,2)); + fields = fieldnames(output.post_hoc_correlations.correlations); + fields(contains(fields, 'RHO')|contains(fields, 'p'))=[]; + for ff=1:size(fields,1) + p_sig_collection = output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables < 0.05; + for l=1:size(latent_selection,2) + for i=1:size(output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables,1) + for ii=1:size(output.post_hoc_correlations.correlations.(fields{ff}).table_p.Variables,2) + if p_sig_collection(i,ii) + f=figure(); + for iii=1:size(input.selected_studygroups,2) + w = output.final_parameters{round(ii/2),1}; + x = output.final_parameters{round(ii/2),latent_selection{1,l}}(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,iii})); + temp=output.post_hoc_correlations.data_collection(:,i); + y = temp(strcmp(output.CV.cv_outer_indices.TestInd{2,w}, input.selected_studygroups{1,iii})); + plot(x,y,'.', 'MarkerSize', marker_size, 'color',colorpattern_LS(iii,:)); + hold on + end + + xlabel(latent_selection{2,l}, 'FontSize', font_size); + y_temp = output.post_hoc_correlations.data_table.Properties.VariableNames(ii); + y_temp = strrep(y_temp, '_', ' '); + y_temp = strrep(y_temp, 'T0',''); + y_temp = strrep(y_temp, 'Screening',''); + ylabel(y_temp, 'FontSize', font_size); + title(['p-value = ' num2str(output.post_hoc_correlations.correlations.(fields{ff}).table_p{i,ii}), ', Spearman''s RHO = ', num2str(output.post_hoc_correlations.correlations.(fields{ff}).table_RHO{i,ii})], 'FontSize', font_size); % add third line + [~, lgd_data] = legend({input.selected_studygroups{1,:}}, 'FontSize', font_size); + lgd_line = findobj(lgd_data, 'type', 'line'); %// objects of legend of type line + set(lgd_line, 'Markersize', marker_size); %// set marker size as desired + hold on + + set(gcf, 'Position', get(0, 'Screensize')); + set(gcf,'PaperPositionMode','auto') + print(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}], '-dpng', '-r0'); + saveas(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}, '.fig']); + % saveas(f, [s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}, '.eps']); + saveas(f,[s_folder, '/latent_scores_LV', num2str(round(ii/2)), '_', latent_selection{3,l}, '_', output.post_hoc_correlations.correlations.(fields{ff}).table_RHO.Properties.VariableNames{ii}],'epsc'); + close all; + + end + end + end + end + end + save(IN.results_path, 'input', 'output', 'setup'); + +end + + +end diff --git a/Visualization_Module/fdr.m b/Visualization_Module/fdr.m new file mode 100644 index 0000000..adab986 --- /dev/null +++ b/Visualization_Module/fdr.m @@ -0,0 +1,134 @@ +function varargout = fdr(varargin) +% Computes the FDR-threshold for a vector of p-values. +% +% Usage: +% [pthr,pcor,padj] = fdr(pvals) +% fdr(pval,q) +% fdr(pval,q,cV) +% +% Inputs: +% pvals = Vector of p-values. +% q = Allowed proportion of false positives (q-value). +% Default = 0.05. +% cV = If 0, uses an harmonic sum for c(V). Otherwise uses c(V)=1. +% Default = 1. +% +% Outputs: +% pthr = FDR threshold. +% pcor = FDR corrected p-values. +% padj = FDR adjusted p-values. +% +% Note that the corrected and adjusted p-values do **not** depend +% on the supplied q-value, but they do depend on the choice of c(V). +% +% References: +% * Benjamini & Hochberg. Controlling the false discovery +% rate: a practical and powerful approach to multiple testing. +% J. R. Statist. Soc. B (1995) 57(1):289-300. +% * Yekutieli & Benjamini. Resampling-based false discovery rate +% controlling multiple test procedures for multiple testing +% procedures. J. Stat. Plan. Inf. (1999) 82:171-96. +% +% ________________________________ +% Anderson M. Winkler +% Research Imaging Center/UTHSCSA +% Dec/2007 (first version) +% Nov/2012 (this version) +% http://brainder.org + +% Accept arguments +switch nargin, + case 0, + error('Error: Not enough arguments.'); + case 1, + pval = varargin{1}; + qval = 0.05; + cV = 1; + case 2, + pval = varargin{1}; + qval = varargin{2}; + cV = 1; + case 3, + pval = varargin{1}; + qval = varargin{2}; + if varargin{3}, cV = 1; + else cV = sum(1./(1:numel(pval))) ; + end + otherwise + error('Error: Too many arguments.') +end + +% Check if pval is a vector +if numel(pval) ~= length(pval), + error('p-values should be a row or column vector, not an array.') +end + +% Check if pvals are within the interval +if min(pval) < 0 || max(pval) > 1, + error('Values out of range (0-1).') +end + +% Check if qval is within the interval +if qval < 0 || qval > 1, + error('q-value out of range (0-1).') +end + +% ========[PART 1: FDR THRESHOLD]======================================== + +% Sort p-values +[pval,oidx] = sort(pval); + +% Number of observations +V = numel(pval); + +% Order (indices), in the same size as the pvalues +idx = reshape(1:V,size(pval)); + +% Line to be used as cutoff +thrline = idx*qval/(V*cV); + +% Find the largest pval, still under the line +thr = max(pval(pval<=thrline)); + +% Deal with the case when all the points under the line +% are equal to zero, and other points are above the line +if thr == 0, + thr = max(thrline(pval<=thrline)); +end + +% Case when it does not cross +if isempty(thr), thr = 0; end + +% Returns the result +varargout{1} = thr; + +% ========[PART 2: FDR CORRECTED]======================================== + +if nargout == 2 || nargout == 3, + + % p-corrected + pcor = pval.*V.*cV./idx; + + % Sort back to the original order and output + [~,oidxR] = sort(oidx); + varargout{2} = pcor(oidxR); +end + +% ========[PART 3: FDR ADJUSTED ]======================================== + +if nargout == 3, + + % Loop over each sorted original p-value + padj = zeros(size(pval)); + prev = 1; + for i = V:-1:1, + % The p-adjusted for the current p-value is the smallest slope among + % all the slopes of each of the p-values larger than the current one + % Yekutieli & Benjamini (1999), equation #3. + padj(i) = min(prev,pval(i)*V*cV/i); + prev = padj(i); + end + varargout{3} = padj(oidxR); +end + +% That's it! \ No newline at end of file diff --git a/Visualization_Module/hyperopt.prj b/Visualization_Module/hyperopt.prj new file mode 100644 index 0000000..1b3ef07 --- /dev/null +++ b/Visualization_Module/hyperopt.prj @@ -0,0 +1,123 @@ + + + hyperopt + + + 1.0 + + + + + + + + + /hyperopt/ + option.installpath.user + + + ${PROJECT_ROOT}/hyperopt/for_testing + ${PROJECT_ROOT}/hyperopt/for_redistribution_files_only + ${PROJECT_ROOT}/hyperopt/for_redistribution + false + + true + subtarget.standalone + + + 35000 + 35010 + 35001 + + true + false + false + MyAppInstaller_web + MyAppInstaller_mcr + MyAppInstaller_app + false + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${PROJECT_ROOT}/dp_ICV_hyperopt.m + + + ${PROJECT_ROOT}/dp_corrections.m + ${PROJECT_ROOT}/dp_correctscale.m + ${PROJECT_ROOT}/dp_deflatescale.m + ${PROJECT_ROOT}/dp_projection.m + ${PROJECT_ROOT}/dp_spls.m + ${PROJECT_ROOT}/dp_spls_full.m + ${PROJECT_ROOT}/dp_standardize.m + ${PROJECT_ROOT}/dp_standardize_comb.m + ${PROJECT_ROOT}/proj_def.m + + + + /volume/DP_FEF/ScrFun/ScriptsRepository/SPLS_Toolbox/hyperopt/for_testing/run_hyperopt.sh + /volume/DP_FEF/ScrFun/ScriptsRepository/SPLS_Toolbox/hyperopt/for_testing/readme.txt + /volume/DP_FEF/ScrFun/ScriptsRepository/SPLS_Toolbox/hyperopt/for_testing/splash.png + /volume/DP_FEF/ScrFun/ScriptsRepository/SPLS_Toolbox/hyperopt/for_testing/hyperopt + + + + /opt/matlab/R2015a + + + + + + true + + + + + true + false + false + false + false + false + true + false + 3.10.0-1062.4.3.el7.x86_64 + false + true + glnxa64 + true + + + \ No newline at end of file diff --git a/Visualization_Module/parsave.m b/Visualization_Module/parsave.m new file mode 100644 index 0000000..02d88be --- /dev/null +++ b/Visualization_Module/parsave.m @@ -0,0 +1,3 @@ +function parsave(fname,RHO) +save(fname,'RHO') +end \ No newline at end of file diff --git a/Visualization_Module/radarplot.zip b/Visualization_Module/radarplot.zip new file mode 100644 index 0000000000000000000000000000000000000000..075c05708d8e59841d460d773df213230c25539b GIT binary patch literal 1968 zcmZ{lc{J3E7r?(`nIYMQn3}OK8QU=R2HD2a*tfCtYz@L_$RNp{CuB(oAtOuWg)oz0 zGDVD+e8y*%C8lis4#hDl2geKeB95e$i(FaWjGJv}jI)M^m>vOUNzgLsz zhVRYGe^{^vWrlPdk?$r~74g4>NtQfK$L1^b){S=VF4N^UXAAv|M+MwuJ$mRY({HUT zhjr?`1=ZJ@f=DgVx2m=zjoIA9g02=bgP0A@P`&hsmK{#_dC2kSo=Wk}YhSeU zWDCJuZem~iu>*b!oHHHz1911A*5E;m`inoT&y9BYhS!AC5)%8W$M|Tthiyo##vKA^ zmGT3Qto(K(fO9i+lD)Q4FQ{zx2AFnDnI$g>CEZWYyOWBUD}EPdSv3R}(Uf)XrajJ4 z^-fNWFgvd+E_g*5f5Xpg#p~982raHtp1o&KRY;!45=6gO8IkZLs3raMtod9Tk5f2Y zEml>$ob=c-{?EoV(9Y+`aE)b;F7x97a7Fyg_5{;$*rG9e{Y>J?(ibYF9gFEdt3K~p zZ1cvalKG4Q;|9`}8DkGgN&#mgla^0PpLGgBK8@2Fn`}07O6{DL<`7Y=)7>@HXyST2 zo^jjOVX1j}+&t*ZLhhh#*I+DK3a#@cuj||J`d~>`)e`p@0rmA^a4sUBJs;T&ide~* zH6T447&mwx;F_dy2ct~kX zzs_WokR;8^#BpMS7j6+TcY}h1ZJM=s5T>R~3mOa^n^5b{@FT)D5|EMtsiDM|m=OKo z+|`z3+e93iFiG<~W3+SD*?O;2V$Pc&ED??_*NdI2@nhXzd~=*^D#pZ?6ck|_`mQY0 z6INSEHAjB$Bzuo@Dzn2|Um4t-SDA7&j-}GeM9W=+XDe}dna{rMznjbOmwHyCpPNM; z7i?o(Z;_Gnjb)LeWSzTACGzqYBTA$u3KyM|o*GsC7zsOHq&+t;|8l6-^XsWG&m4!( z#wCW|+-la2IxeG8dTO;F26#wpgLT16{m0#d>a^RidpBQX8JlHFcriAX3>Ljp+60PR z-cnQac8L(;$B&R~2RHdQ6~2;aI-U&P6g**3ASb*=!xww$^{H*GwYV#tRP57nS6M~k zpF~ax6rmW;S3UhJ&%+dOjT|BmqWd<#vbSnypiy}%`OQaB(k^x>G@lofXAwG;5-BXtN)^?@{4Rxc_jp_)`9B2 z<9ptR-m@h(B~A*Bpuwb1;b>^6mMVNF0_7{tJSlUk8I^ry;r)pu zfAxsy2&!W{aY9pQa!2fj^*I@uO;6N)g?DHpMVH%6EQj2pqC^Dvb}2k`s&WZ#n{nSs z*X`ou+OA&p%xCKN9J+9QiS#ymc?*@)Y1&5mAjjG@upR)#oTjngQjtswVPA8L%|7#E z$oK;_3<}*ANDW((K3(>Hw|>QYMrPxNOp=7*3VdT!4{8y`Q_~w3?)Als3Bo2R628xPWeS&b zyY^uSOpJND=%v7H$avP~1cNc6$`KiJW~_2Ct{pasFOSmoq(A8>8W>J=;G@rY0I#cY zgnkTo9r5N@quSm&raw`})l88BQb(c-aI9xd83OgRiV{~f6zzR7?G#lLoDcrBfLlbe zf}oB#k_Yrs9%|?Jaq$3EP4?4A5Lxa87{{%Ty7qi-yk!d;g@cVo^s*GSq252JaBTsF zaq-dGX{lapVK0yPCI7&YWER#Bc|plsh#-R~DwXI~Dh!tUEqlb!t&|T3FPE6bU=)#K zvee;$YXy#B@(q?R@@2c-()5hIXp@R5uXsk&ypQoah-x!tt(v0tb1;DlLDk)=SR7wK zZEIuTsZ&0WoKf>3DPDNhemmv-;?CMk%e0PfmQWdGm0%}>hNH}WN$e>~J@zJcawz6> z{*j%92;GuSm+&SmNTofCtaV*wmkZTOCcS&&=j5nw?xWl|=9MiB;||o&PZsOrC^V^& zjdH=fB@3UXsOnq>>?qQ(@*{C{xd|1SVR2evBdF#HE9x&KC`OEdudd&|Es<8b!xE$0rH`agGKZ^aEh PEOH!Z!hx5D9A> x=1:10 +x = + 1 2 3 4 5 6 7 8 9 10 +>> y=rand(1,10) +y = + Columns 1 through 7 + 0.8147 0.9058 0.1270 0.9134 0.6324 0.0975 0.2785 + Columns 8 through 10 + 0.5469 0.9575 0.9649 +>> trapz(x(4:6),y(4:6)) +ans = + 1.1378 + + X = 0:pi/100:pi; +Calculate the sine of X. + +Y = sin(X); +Integrate Y using trapz. + +Q = trapz(X(98:end),Y(98:end)) + +plot(X,Y) + +bins = 0:0.001:1; + +RHO_test = 0.2163; + +fun = @(x_var, y_var)abs(x_var-y_var); +fun(RHO_test,3) +for i=1:size(bins,2) + evalbins(i) = fun(bins(i), RHO_test); +end + +h = histogram(RHO_b_collection, 'BinWidth', 0.001); +x = 0:0.001:((h.NumBins-1)*0.001); +y = h.Values; + +[~, index] = min(abs(x-RHO_test)); +auc_ratio = trapz(x(index:end), y(index:end))/trapz(x,y) +auc_ratio = sum(cumtrapz(x(index:end), y(index:end)))/sum(cumtrapz(x,y)) + +IN.dist = RHO_b_collection; +IN.val = RHO_test; + +[p_val, h] = dp_auc_testing(IN) +testing_nr=2 + +switch testing_nr + case 2 + disp('1'); + case 1 + disp('2'); + + case 3 + disp('3'); + +end diff --git a/Visualization_Module/wmean.zip b/Visualization_Module/wmean.zip new file mode 100644 index 0000000000000000000000000000000000000000..6566650041fe4e30af16df5bfce15e77876791e0 GIT binary patch literal 1545 zcmZ`(doi9ixQ7;k%0LLK!AP>NS#OTnV7_(@S7Zd=`I~WqAhX5%_b_)QOmmT#PdfRiAS^LT0 zbR?|BqEMKfsdJdF{LlmT=6;`1wt`dJI(JF=)3RMrGs(y;^K6l@ef330qSrEME}hKH zguL4wFFcc6(d5KrxmC@*nf?F;2rxHstV*k|_1yy^IXo)r&O2|S0X(&u>DzD(gUKB4 zCnsN}b6@(T|4FmJq}`fPBP>nX4&V|IjJlbJS&H_tap*Sf zbO%s9FW#?)_EFWKufCMPk$5qxlH2(5G0Icwhw*Ejzwk%Fr%}15!DJ0HSL0#E<}4Pe zS@iP?iEB-=lO=&YY(W=0@1ivf8+q)^JhB+=TT}!-8cF$)q+#5cB&0JYTo!DNb>1qB zwM=$Y?*Hb>l)af%>^fu&ZE?*xdJmG3l>w-| zi3#43o2C|-vkxeO{6x*{5Zn^QUnPiJpHk{yAo)D>LYrJZwicxqL7cS4KOcB#IIWR4 zoWZgWm#^VbC-h53s3`+ntSY5ujGd~Oh$5T!cUH<^gb7^oGrmHo+QN7_7pBQH?dd*q zOe-NMwKUX7QLEU#eZjp#a8@gA)}Qb;0p)ieX1OmZKKX`1px^Sbp;u1_TN8P$qu5FF zHuq1ARoNL&ACE&R(<`{bzFJcK=;s_~)h>CchcooDP+sK%ruw@2ZW*!TJqw{Q7-6=j z3$`-MUUYPzK<3c5<_juZ79fL*s0<$)wm;j0tTe&YWwr`K5qkgymZcaIGbDI|*4}B> zq&uRzQxNuOE&sic>AxO<2$-4rn6jZs{EtZQm{z6MNkyt96{(U`h*9Aop)m=eW~Y)) zwRpHw%h9mT&z`pLN002OKlAKNd>7PR4|CgXWS23&kwY`lX`%^Yp)ZY~f9qh14rw-0n_qV*p zrW;LpSlErKZ(u>VU4q*Z2^*R!p5a&=4`!90N6WoSp%>ISDL_)*HZuPo4L0ygA7Jz|urqP{XH(1>J?}GM5=W8`>zx zv-oWFXi7dyT)h?@5D*Th+kj8KpOp*H{%v)9-hc4P!zlF01na=j`zw5e!|B?`2rEW+ zyej6~!n*jTIF@f8i2;qKJ;*)6dCYHUxN%V_X5kg&vWnTNP7DE+E5ni57fP3PUMO?9 zEYPP}qUx0AY-B({ttpIuz$Juiz$n>BRvA^q;Hz;Pb1tT$l`vfso@IPvR_?+679kJG z&uAXuSf)Ao?kMBEd5+_a19sKcvSi;*A8W8`Q(C9*=nQ zeu8t!$npdXo@xPqWdyrF5JYm;?+HxS&)-%8YpFd>mJ2zl%z^tH`HAm=naQrAKBc>9lBBk^pTvb;J}Y}gD?-pjdHKzCn74!Xygk5h>* zG9YX4|0S==-w7)t?KkOL<3BkJ`I5t=BmnrD;cHuL{e5BJNX_yulMr1Xa$8G~w9ZK} JN$Mj2_zPq@p1}YB literal 0 HcmV?d00001