Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
HARMONY/HARMONYRules.m
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
607 lines (580 sloc)
26.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [data_table_out,OutDict] = HARMONYRules(data_table,Visits,Dictionnary) | |
%% DESCRIPTION | |
% Function to translate PRONIA variables to HARMONY ones. See | |
% /opt/PRONIASoftware/Developpment/AnneTesting/NDL_code/DataAllCron/DataQuery/DataDictionnary/HARMONY/HARMONY Clinical measures data dictionary Vipar 091720.xlsx and | |
% and | |
% /opt/PRONIASoftware/Developpment/AnneTesting/NDL_code/DataAllCron/DataQuery/DataDictionnary/HARMONY/Translator_RS_Oct2020.xlsx | |
% for more details and instruction on how the items are translated | |
%% INPUTS | |
% - data_table = table. Table containing all items necessary for the | |
% translation and recoding of the pronia variables to harmony variables | |
% - Visits = cell array of string. Currently not necessary, but if | |
% longitudinal data are required, this will become handy | |
% - Dictionnary = string. Full path to the dictionnary that contains the | |
% translation from pronia to harmony. Ideally the header of the table | |
% should be like the following: | |
% 'ItemName' % new name of the item (in that case harmony) | |
% 'OriginalName' % original name from the PortalName (in that case pronia) | |
%% OUTPUT | |
% - data_table_out = table. Items recoded according to the rules in this | |
% function | |
% -Dict = table. Dictionnary will be removed in the future. It | |
% is for the ROIs MRI | |
%% CREDENTIALS | |
% Created by Rachele Sanfelici, 10/2020, @PRONIA | |
% Modified by AnneRuef 28-Oct-2020 @PRONIA | |
% Doc improved by Anne Ruef 22-Apr-2021 | |
%% | |
% inputs checker | |
if nargin == 0 | |
error('HARMONYRules: not enough inputs arguments') | |
end | |
if exist('Visits','var') | |
Visits = {''}; % this is hard coded | |
% not used yet but it will be if longitudinal data is required | |
end | |
% read the dictionnary | |
if exist('Dictionnary','var') | |
if contains(Dictionnary,'xls') | |
[~,~,Dict] = xlsread(Dictionnary); | |
TempHead = Dict(1,:); | |
Dict = cell2table(Dict(2:end,:)); | |
Dict.Properties.VariableNames = TempHead; | |
elseif contains(Dictionnary,'csv') | |
Dict = readtable(Dictionnary); | |
else | |
error('HARMONYRules: unknown dictionnary file type') | |
end | |
end | |
%% | |
% initialise the table | |
data_table_out = table; | |
%% | |
% extract the consortium == 3 this is PRONIA (for other sites change that) | |
data_table_out.consortium = repmat(3,size(data_table,1),1); | |
%% | |
% extract the site and psn | |
% NOTE there is certainly a more elegant way to code this, please feel free | |
% to change that | |
% 20=“PRONIA/Bar�?, | |
% 21=“PRONIA/Basel�?, | |
% 22=“PRONIA/Birmingham�?, | |
% 23=“PRONIA/Cologne�?, | |
% 24=“PRONIA/Dusseldorf�?, | |
% 25=“PRONIA/Milan�?, | |
% 26=“PRONIA/Munich�?, | |
% 27=“PRONIA/Munster�?, | |
% 28=“PRONIA/Turku�?, | |
% 29=“PRONIA/Udine�?, | |
sites = {'UBARI','UBS','Uni BHAM','UKK','UDUS','MilanNig','LMU','UMUENS','Uni Turku','Uni Udine'}; | |
s = 20:29; | |
data_table_out.site = nan(size(data_table,1),1); | |
data_table_out.subject_id = nan(size(data_table,1),1); | |
for i=1:size(sites,2) | |
data_table_out.site(strcmp(data_table.INSTITUTE_SHORTNAME,sites{1,i})) = s(i); | |
% create a random number of the participant | |
TempRand = 1:999; | |
[~,Idx] = sort(rand(numel(TempRand),1)); | |
RandNb = TempRand(Idx(1:sum(strcmp(data_table.INSTITUTE_SHORTNAME,sites{1,i})))); | |
data_table_out.subject_id(strcmp(data_table.INSTITUTE_SHORTNAME,sites{1,i})) = RandNb; | |
end | |
% create the subject id | |
for i = 1:size(data_table,1) | |
% add 0 in case the randomly generated number is different from 4 | |
if numel(num2str(data_table_out.subject_id(i))) == 4 | |
data_table_out.subject_id(i) = str2double(['3',num2str(data_table_out.site(i)),num2str(data_table_out.subject_id(i))]); | |
elseif numel(num2str(data_table_out.subject_id(i))) == 3 | |
data_table_out.subject_id(i) = str2double(['3',num2str(data_table_out.site(i)),num2str(data_table_out.subject_id(i)),'0']); | |
elseif numel(num2str(data_table_out.subject_id(i))) == 2 | |
data_table_out.subject_id(i) = str2double(['3',num2str(data_table_out.site(i)),num2str(data_table_out.subject_id(i)),'00']); | |
elseif numel(num2str(data_table_out.subject_id(i))) == 1 | |
data_table_out.subject_id(i) = str2double(['3',num2str(data_table_out.site(i)),num2str(data_table_out.subject_id(i)),'000']); | |
else | |
data_table_out.subject_id(i) = str2double(['3',num2str(data_table_out.site(i)),num2str(data_table_out.subject_id(i)),'0000']); | |
end | |
end | |
%% | |
% extract the subject type or studygroup. NOTE How do I code for ROD ? | |
% recode the study group | |
data_table_out.subjecttype = nan(size(data_table,1),1); | |
data_table_out.subjecttype(strcmp(data_table.Studygroup,'HC'),1) = 1; | |
data_table_out.subjecttype(strcmp(data_table.Studygroup,'CHR'),1) = 2; | |
%% | |
% extract age in months and assessment date | |
data_table_out.assessment_age = nan(size(data_table,1),1); | |
birthdate_var = data_table.BIRTHDATE; | |
data_table_out.assessment_date = nan(size(data_table,1),1); | |
data_table_out.assessment_date = data_table.Study_date_sMRI_T0; | |
%% TODO need to fix the query to export all ALL_QUEST_Examination_DATE for all questionnaires | |
quests={'SIPS_P','SPI_A_COGDIS','GAF','GF','TREATMENT','DROPOUT'}; | |
visits = {'Screening','T0'}; | |
for q = 1:numel(quests) | |
for v = 1:numel(visits) | |
if ismember({['ALL_QUEST_Examination_Date_',quests{q},'_',visits{v}]},data_table.Properties.VariableNames) | |
data_table_out.assessment_date(cellfun(@isempty,data_table_out.assessment_date)) = data_table.(['ALL_QUEST_Examination_Date_',quests{q},'_',visits{v}])(cellfun(@isempty,MRI_var)); | |
end | |
end | |
end | |
% create a mask for empty values | |
Mask = cellfun(@isempty,birthdate_var) | cellfun(@isempty,data_table_out.assessment_date); | |
% create a warning in case we are missing age | |
if sum(Mask) ~= 0 | |
warning('HARMONYRules: some participants do not have age') | |
end | |
data_table_out.assessment_age(~Mask) = cell2mat(cellfun(@(x,y) split(caldiff([datetime(x,'InputFormat','dd-MM-yyyy'),datetime(y,'InputFormat','dd-MM-yyyy')],'months'),'mo'),birthdate_var(~Mask),data_table_out.assessment_date(~Mask),'UniformOutput',false)); | |
%% | |
% visit_month == 0 because we are looking at baseline, expand this for | |
% follow up data | |
data_table_out.visit_month = nan(size(data_table,1),1); | |
data_table_out.visit_month(~Mask) = 0; | |
%% | |
% gender == 0 | |
data_table_out.gender = nan(size(data_table,1),1); | |
data_table_out.gender(~Mask) = 0; | |
%% | |
% extract education | |
data_table_out.education_current = nan(size(data_table,1),1); | |
data_table_out.education_current(data_table.DEMOG_T0T1T2_38A2_WorkCurrent_Type_T0 == 2,1) = 1; | |
data_table_out.education_current(data_table.DEMOG_T0T1T2_38A2_WorkCurrent_Type_T0 ~= 2,1) = 2; | |
%% | |
% education highest | |
data_table_out.education_highest = nan(size(data_table,1),1); | |
data_table_out.education_highest(data_table.DEMOG_T0T1T2_33_GraduationOther_T0 == 1,1) = 1; | |
data_table_out.education_highest(data_table.DEMOG_T0T1T2_34_GraduationWithout_T0 == 1,1) = 1; | |
data_table_out.education_highest(data_table.DEMOG_T0T1T2_32_GraduationUni_T0 == 1,1) = 2; | |
data_table_out.education_highest(data_table.DEMOG_T0T1T2_35_UniDegree_T0 == 1,1) = 5; | |
%% | |
% extract race_white | |
data_table_out.race_white = nan(size(data_table,1),1); | |
data_table_out.race_white(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[1:6]),1) = 1; | |
data_table_out.race_white(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[1:6]),1) = 2; | |
%% | |
% extract race_black | |
data_table_out.race_black = nan(size(data_table,1),1); | |
data_table_out.race_black(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[14:16]),1) = 1; | |
data_table_out.race_black(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[14:16]),1) = 2; | |
%% | |
% extract 'race_east_asian' | |
data_table_out.race_east_asian = nan(size(data_table,1),1); | |
data_table_out.race_east_asian(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[10:12]),1) = 1; | |
data_table_out.race_east_asian(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[10:12]),1) = 2; | |
%% | |
% extract 'race_asian_indian | |
data_table_out.race_asian_indian = nan(size(data_table,1),1); | |
data_table_out.race_asian_indian(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[7:9]),1) = 1; | |
data_table_out.race_asian_indian(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[7:9]),1) = 2; | |
%% | |
% extract 'race_middle_eastern' | |
data_table_out.race_middle_eastern = nan(size(data_table,1),1); | |
data_table_out.race_middle_eastern(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,13),1) = 1; | |
data_table_out.race_middle_eastern(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,13),1) = 2; | |
%% | |
% extract 'race_inter_racial' | |
data_table_out.race_inter_racial = nan(size(data_table,1),1); | |
data_table_out.race_inter_racial(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[17:20]),1) = 1; | |
data_table_out.race_inter_racial(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[17:20]),1) = 2; | |
%% | |
% extract 'race_other' | |
data_table_out.race_other = nan(size(data_table,1),1); | |
data_table_out.race_other(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[21:23]),1) = 1; | |
data_table_out.race_other(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[21:23]),1) = 2; | |
%% | |
% extract 'Immigrated' | |
data_table_out.Immigrated = nan(size(data_table,1),1); | |
data_table_out.Immigrated(~isnan(data_table.DEMOG_T0_01_AgeImmigration_T0),1) = 1; | |
data_table_out.Immigrated(isnan(data_table.DEMOG_T0_01_AgeImmigration_T0),1) = 2; | |
%% | |
% extract 'Referral_Source' | |
%% double check the numerical correspondance | |
data_table_out.Referral_Source = nan(size(data_table,1),1); | |
%% to code better ref 7 not existing | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_1,1),1) = 130; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_2,1),1) = 120; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_3,1),1) = 120; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_4,1),1) = 120; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_5,1),1) = 160; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_6,1),1) = 84; | |
% data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_7,1),1) = 170; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_8,1),1) = 140; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_12,1),1) = 140; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_9,1),1) = 100; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_10,1),1) = 110; | |
data_table_out.Referral_Source(ismember(data_table.REF_03_Referrer_Screening_11,1),1) = 180; | |
%% | |
% Hospitalization | |
data_table_out.Hospitalization = nan(size(data_table,1),1); | |
data_table_out.Hospitalization(ismember(data_table.TREAT_HOSP_00_EverHospitalized_Screening,1),1) = 2; | |
data_table_out.Hospitalization(ismember(data_table.TREAT_HOSP_00_EverHospitalized_Screening,2),1) = 1; | |
%% | |
% extract 'caarms_e' | |
data_table_out.caarms_e = nan(size(data_table,1),1); | |
for i=1:size(data_table,1) | |
%NOTE what happens if it is none of those | |
% check if we can do that columnwise | |
%% original, the name of the item does not match | |
% P.CAARMS_CHR_01_Q3_Group(i)==1 || P.CAARMS_CHR_01_Q2_Group(i)==1 || P.CAARMS_CHR_01_Q6_Group(i)==1 | |
% double check if this is correct | |
if data_table.CAARMS_CHR_01_Q3_Group_Screening(i)==1 || data_table.CAARMS_CHR_02_Q2_Group_Screening(i)==1 || data_table.CAARMS_CHR_03_Q6_Group_Screening(i)==1 | |
data_table_out.caarms_e(i,1) = 1; | |
else | |
data_table_out.caarms_e(i,1) = 0; | |
end | |
end | |
%% | |
% extract sips | |
SubSips = {'n','d','g'}; | |
SubSipsPronia = {'N','D','G'}; | |
for s = 1:numel(SubSips) | |
for i = 1:6 | |
if i > 4 | |
if ismember(SubSips(s),{'d','g'}) | |
% skip if we have the items d and g and is bigger than 4, no | |
% rules for taht | |
continue | |
end | |
end | |
data_table_out.([SubSips{s},num2str(i),'_onsetdatecode']) = nan(size(data_table,1),1); | |
% 1="NA (Severity < 3)" | |
data_table_out.([SubSips{s},num2str(i),'_onsetdatecode'])(data_table.(['SIPS_',SubSipsPronia{s},num2str(i),'_01_QUALY_B_00_0_SeverityScale_T0']) < 3) = 1; | |
% 2="Onset Date available" | |
data_table_out.([SubSips{s},num2str(i),'_onsetdatecode'])(data_table.(['SIPS_',SubSipsPronia{s},num2str(i),'_01_QUALY_C_00_Onset_T0']) == 3) = 2; | |
% 3="Lifetime or cannot recall year of onset" | |
data_table_out.([SubSips{s},num2str(i),'_onsetdatecode'])(data_table.(['SIPS_',SubSipsPronia{s},num2str(i),'_01_QUALY_C_00_Onset_T0']) == 1 | data_table.(['SIPS_',SubSipsPronia{s},num2str(i),'_01_QUALY_C_00_Onset_T0']) == 2) = 3; | |
end | |
end | |
%% | |
% extract 'spi_a_sum' | |
data_table_out.('spi_a_sum') = nan(size(data_table,1),1); | |
temp_spia = [data_table.SPI_A_A1_1_1_T0,data_table.SPI_A_A1_2_1_T0,data_table.SPI_A_A1_3_1_T0,... | |
data_table.SPI_A_A2_1_1_T0,data_table.SPI_A_A2_2_1_T0,data_table.SPI_A_A3_1_T0]; | |
temp_spia(ismember(temp_spia,[7,8,9])) = 0; | |
data_table_out.('spi_a_sum') = sum(temp_spia,2); | |
%% | |
% extract 'spi_b_sum' | |
data_table_out.('spi_b_sum') = nan(size(data_table,1),1); | |
temp_spia = [data_table.SPI_A_COGDIS_B1_1_Screening,data_table.SPI_A_B2_1_T0,data_table.SPI_A_B3_1_T0,... | |
data_table.SPI_A_B4_1_T0,data_table.SPI_A_B5_1_T0,data_table.SPI_A_B6_1_T0]; | |
temp_spia(ismember(temp_spia,[7,8,9])) = 0; | |
data_table_out.('spi_b_sum') = sum(temp_spia,2); | |
%% | |
% extract 'spi_c_sum' | |
data_table_out.('spi_c_sum') = nan(size(data_table,1),1); | |
temp_spia = [data_table.SPI_A_C1_1_T0,data_table.SPI_A_COGDIS_C2_1_Screening,data_table.SPI_A_COGDIS_C3_1_Screening,... | |
data_table.SPI_A_COGDIS_C4_1_Screening,data_table.SPI_A_COGDIS_C5_1_Screening,data_table.SPI_A_C6_1_T0]; | |
temp_spia(ismember(temp_spia,[7,8,9])) = 0; | |
data_table_out.('spi_c_sum') = sum(temp_spia,2); | |
%% | |
% extract 'spi_d_sum' | |
data_table_out.('spi_d_sum') = nan(size(data_table,1),1); | |
temp_spia = [data_table.SPI_A_D1_1_T0,data_table.SPI_A_D2_1_T0,data_table.SPI_A_COGDIS_D3_1_Screening,... | |
data_table.SPI_A_COGDIS_D4_1_Screening,data_table.SPI_A_D5_1_Screening,data_table.SPI_A_A3_1_T0]; | |
temp_spia(ismember(temp_spia,[7,8,9])) = 0; | |
data_table_out.('spi_d_sum') = sum(temp_spia,2); | |
%% | |
% extract 'spi_e_sum' | |
data_table_out.('spi_e_sum') = nan(size(data_table,1),1); | |
temp_spia = [data_table.SPI_A_E1_1_T0,data_table.SPI_A_E2_1_T0,data_table.SPI_A_E3_1_T0,... | |
data_table.SPI_A_E4_1_T0,data_table.SPI_A_E5_1_T0,data_table.SPI_A_E6_1_T0]; | |
temp_spia(ismember(temp_spia,[7,8,9])) = 0; | |
data_table_out.('spi_e_sum') = sum(temp_spia,2); | |
%% | |
% extract 'spi_f_sum' | |
data_table_out.('spi_f_sum') = nan(size(data_table,1),1); | |
temp_spia = [data_table.SPI_A_F1_1_T0,data_table.SPI_A_F2_1_Screening,data_table.SPI_A_F3_1_Screening,... | |
data_table.SPI_A_F4_1_T0,data_table.SPI_A_F5_1_Screening,data_table.SPI_A_F6_1_T0]; | |
temp_spia(ismember(temp_spia,[7,8,9])) = 0; | |
data_table_out.('spi_f_sum') = sum(temp_spia,2); | |
%% | |
% extract 'ctq' see function CTQRules on how these are computed. NOTE some | |
% of the subscales are not there because the number of the CTQ differs from | |
% Pronia | |
% recoded items (add *1 to the item and reverse some of them) | |
for i = 1:9 | |
data_table_out.(['ctq_0',num2str(i)]) = nan(size(data_table,1),1); | |
data_table_out.(['ctq_0',num2str(i)]) = data_table.(['CTQ_0',num2str(i),'_recod_T0']); | |
end | |
for i = 10:28 | |
data_table_out.(['ctq_',num2str(i)]) = nan(size(data_table,1),1); | |
data_table_out.(['ctq_',num2str(i)]) = data_table.(['CTQ_',num2str(i),'_recod_T0']); | |
end | |
% extract ctq sexual abuse | |
data_table_out.ctqscore_sa = nan(size(data_table,1),1); | |
data_table_out.ctqscore_sa = data_table.CTQ_sabu_T0; | |
% extract ctq physical neglect | |
data_table_out.ctqscore_pn = nan(size(data_table,1),1); | |
data_table_out.ctqscore_pn = data_table.CTQ_pneg_T0; | |
% extract ctq validity, not in pronia rules might be worth it to add it | |
% CTQ validity items=CTQ_10 + CTQ_16 + CTQ_22 | |
% v = final_CTQ.(CTQ{10}) + final_CTQ.(CTQ{16}) + final_CTQ.(CTQ{22}); | |
data_table_out.ctqscore_val = nan(size(data_table,1),1); | |
data_table_out.ctqscore_val = data_table.CTQ_10_recod_T0 + data_table.CTQ_16_recod_T0 + data_table.CTQ_22_recod_T0; | |
%% | |
% rename the items in the column recode | |
for d = 1:size(Dict,1) | |
% check if it needs to be recoded, and if yes check that it is in the | |
% data_table_out | |
if isequal(Dict.Recode(d),1) | |
if ~ismember(Dict.varName(d),data_table_out.Properties.VariableNames) | |
disp([Dict.varName{d},' does not exist in data_table_out but the dictionary says that it is recoded']) | |
end | |
continue | |
end | |
% check if there is a corresponding variable name, if not set all the | |
% values to nan | |
if isequal(Dict.PRONIAvar(d),{'NaN'}) | |
% disp([Dict.varName{d},' has no corresponding variable name in dictionnary set the whole variable to empty']) | |
data_table_out.(Dict.varName{d}) = nan(size(data_table,1),1); | |
continue | |
end | |
% check if it is a date and transform into HARMONY date format, also | |
% here there is a more elegant way to do that | |
if isequal(Dict.Type(d),{'Date'}) | |
if ismember({[Dict.PRONIAvar{d},'_Screening']},data_table.Properties.VariableNames) | |
TempDate = data_table.([Dict.PRONIAvar{d},'_Screening']); | |
elseif ismember({[Dict.PRONIAvar{d},'_T0']},data_table.Properties.VariableNames) | |
TempDate = data_table.([Dict.PRONIAvar{d},'_T0']); | |
else | |
disp([Dict.varName{d},' does not exist in data_table but has a corresponding name in dictionnary replace the whole column by nan']) | |
data_table_out.(Dict.varName{d}) = nan(size(data_table,1),1); | |
continue | |
end | |
data_table_out.(Dict.varName{d}) = cell(size(data_table,1),1); | |
for p = 1:numel(TempDate) | |
if ~isempty(TempDate{p}) | |
% there is certainly a more elegant way to do that | |
data_table_out.(Dict.varName{d}){p} = TempDate{p}([4,5,3,1,2,6,7,8,9,10]); | |
end | |
end | |
continue | |
end | |
% just rename the item | |
if ismember({[Dict.PRONIAvar{d},'_Screening']},data_table.Properties.VariableNames) | |
data_table_out.(Dict.varName{d}) = data_table.([Dict.PRONIAvar{d},'_Screening']); | |
elseif ismember({[Dict.PRONIAvar{d},'_T0']},data_table.Properties.VariableNames) | |
data_table_out.(Dict.varName{d}) = data_table.([Dict.PRONIAvar{d},'_T0']); | |
else | |
if isnan(Dict.varName{d}) | |
break | |
end | |
disp([Dict.varName{d},' does not exist in data_table but has a corresponding name in dictionnary replace the whole column by nan']) | |
data_table_out.(Dict.varName{d}) = nan(size(data_table,1),1); | |
continue | |
end | |
end | |
%% need to replace nan by empty. Convert the whole table to cell and replace nans by empty | |
% script to generate dictionnary for mri HARMONY | |
% step 1. Define the subjects items such as te tr subject id and regions of | |
% interest | |
% step 2. Read the data automatically (MetaQualMRI and the text files | |
% storing the data) | |
% step 3. create the dictionnary, depending on the number of regions of | |
% interest | |
% step 4. create the dicitonnary with the following columns: | |
% * Variable name | |
% * Variable description | |
% * Type (continuous, categorical, date) | |
% * Value (range, category, date format) | |
%% subject id old | |
% subjectkey | |
% src_subject_id | |
% interview_date | |
% interview_age | |
% gender | |
% data_table_out.consortium | |
% data_table_out.subject_id | |
% data_table_out.site | |
% data_table_out.assessment_date | |
% data_table_out.assessment_age | |
% data_table_out.gender | |
% mri parameters | |
% data_table_out.site | |
% acqu_sub_site? no need for this one except if we have a scanner upgrade | |
%% | |
% mri_brand (model name? Mark?) | |
% 'Manufacturer_sMRI_T0_PhilipsHealthcare'; | |
% 'Manufacturer_sMRI_T0_PhilipsMedicalSystems'; | |
% 'Manufacturer_sMRI_T0_SIEMENS' | |
% {'SiemensTrio PhilipsIngena'}; | |
% offer of naming convention dictionnary | |
% | |
%% | |
% initialise the out dictionnary | |
OutDict = Dict(1,:); | |
%% | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 'mri_brand'; | |
TempDict.Values = '1="PhilipsMedicalSystemsAchieva",2="PhilipsHealthcareIngenia",3="PhilipsMedicalSystemsIngenuity",4="SIEMENSPrisma",5="SIEMENSPrismafit",6="SIEMENSTrioTim",7="SIEMENSVerio"'; | |
TempDict.Type = 'categorical'; | |
TempDict.PRONIAvar = {'Brand and scanner type combined'}; | |
OutDict = [OutDict;TempDict]; | |
% 1="PhilipsMedicalSystemsAchieva",2="PhilipsHealthcareIngenia",3="PhilipsMedicalSystemsIngenuity",4="SIEMENSPrisma",5="SIEMENSPrismafit",6="SIEMENSTrioTim",7="SIEMENSVerio" | |
BrandNames = {'Achieva','Ingenia','Ingenuity','Prisma','Prismafit','TrioTim','Verio'}; | |
data_table_out.mri_brand = nan(size(data_table,1),1); | |
for b = 1:numel(BrandNames) | |
data_table_out.mri_brand(data_table.(['Model_name_sMRI_T0_',BrandNames{b}])==1) = b; | |
end | |
%% | |
% mri_field_strength 3T by default | |
% offer of naming convention dictionnary | |
% 1="3T" | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 'mri_field_strength'; | |
TempDict.Values = '1="3T"'; | |
TempDict.Type = 'categorical'; | |
TempDict.PRONIAvar = {'Scanner field strength'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.mri_field_strength = nan(size(data_table,1),1); | |
data_table_out.mri_field_strength(~isempty(data_table_out.mri_brand)) = 1; | |
%% | |
% t1w_acqu_time TA | |
%% | |
% t1w_vox_size_x | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_vox_size_x'; | |
TempDict.Values = '0:3:4'; | |
TempDict.Type = 'continous'; | |
TempDict.PRONIAvar = {'T1-weighted mri voxel size x direction'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_vox_size_x = nan(size(data_table,1),1); | |
data_table_out.t1w_vox_size_x = data_table.Voxel_size_x_mm_sMRI_T0; | |
%% | |
% t1w_vox_size_y | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_vox_size_y'; | |
TempDict.Values = '0:3:4'; | |
TempDict.Type = 'continous'; | |
TempDict.PRONIAvar = {'T1-weighted mri voxel size y direction'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_vox_size_y = nan(size(data_table,1),1); | |
data_table_out.t1w_vox_size_y = data_table.Voxel_size_y_mm_sMRI_T0; | |
%% | |
% t1w_vox_size_z no value | |
% data_table_out.t1w_vox_size_z = nan(size(data_table,1),1); | |
% data_table_out.t1w_vox_size_z = data_table.Slice_thickness_mm_sMRI_T0; | |
%% | |
% t1w_slices | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_slices'; | |
TempDict.Values = '0:300:0'; | |
TempDict.Type = 'continous'; | |
TempDict.PRONIAvar = {'T1-weighted number of slices'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_slices = nan(size(data_table,1),1); | |
data_table_out.t1w_slices = data_table.Number_of_slices_sMRI_T0; | |
%% | |
%t1w_orientation | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_orientation'; | |
TempDict.Values = '1="coronal",2="sagittal",3="transversal"'; | |
TempDict.Type = 'categorical'; | |
TempDict.PRONIAvar = {'T1-weighted orientation'}; | |
OutDict = [OutDict;TempDict]; | |
% dictionnary 1="coronal",2="sagittal",3="transversal"; | |
OrientName = {'cor','sag','tra'}; | |
data_table_out.t1w_orientation = nan(size(data_table,1),1); | |
for b = 1:numel(OrientName) | |
data_table_out.t1w_orientation(data_table.(['Orientation_sMRI_T0_',OrientName{b}])==1) = b; | |
end | |
%% | |
% t1w_fov ? | |
% data_table_out.t1w_fov = nan(size(data_table,1),1); | |
% data_table_out.t1w_fov = | |
%% | |
% t1w_thickness | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_thickness'; | |
TempDict.Values = '0:10:4'; | |
TempDict.Type = 'continuous'; | |
TempDict.PRONIAvar = {'T1-weighted slice thickness'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_thickness = nan(size(data_table,1),1); | |
data_table_out.t1w_thickness = data_table.Slice_thickness_mm_sMRI_T0; | |
%% | |
% t1w_tr | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_tr'; | |
TempDict.Values = '0:3:4'; | |
TempDict.Type = 'continuous'; | |
TempDict.PRONIAvar = {'T1-weighted repetition time'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_tr = nan(size(data_table,1),1); | |
data_table_out.t1w_tr = data_table.Repetition_time_ms_sMRI_T0; | |
%% | |
% t1w_te | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_te'; | |
TempDict.Values = '0:6:4'; | |
TempDict.Type = 'continuous'; | |
TempDict.PRONIAvar = {'T1-weighted echo time'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_te = nan(size(data_table,1),1); | |
data_table_out.t1w_te = data_table.Echo_time1_ms_sMRI_T0; | |
%% | |
% t1w_fa | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = 't1w_fa'; | |
TempDict.Values = '0:15:0'; | |
TempDict.Type = 'continuous'; | |
TempDict.PRONIAvar = {'T1-weighted flip angle'}; | |
OutDict = [OutDict;TempDict]; | |
data_table_out.t1w_fa = nan(size(data_table,1),1); | |
data_table_out.t1w_fa = data_table.Flip_angle_sMRI_T0; | |
%% | |
% t1w_pulse_sequ? | |
%% | |
% actual mri stuff | |
% part of the code to integrate in the query system | |
% folder containing the files. WARNING: these data are not in the pronia | |
% format | |
DataFolder = '/volume/HARMONY/Qunex/freesurfer_tables'; | |
TextNames = {'_T0_aparc_stats_hemi_lh.txt','_T0_aparc_stats_hemi_rh.txt','_T0_asegs_stats.txt'}; | |
TempNamesAll = {}; | |
for p = 1:size(data_table,1) | |
% read cortex lh | |
for t = 1:numel(TextNames) | |
FileName = fullfile(DataFolder,[data_table.PSN{p},TextNames{t}]); | |
if ~exist(FileName,'file') | |
% if the file does not exist there is no point to continue | |
continue | |
end | |
TempData = readtable(fullfile(DataFolder,[data_table.PSN{p},TextNames{t}])); | |
TempNamesAll = [TempNamesAll,TempData.Properties.VariableNames]; | |
for d = 1:numel(TempData) | |
if ismember(TempData.Properties.VariableNames(d),{'lh_aparc_area','rh_aparc_area','Measure_volume'}) | |
continue | |
end | |
if ~ismember(TempData.Properties.VariableNames(d),data_table_out.Properties.VariableNames) | |
data_table_out.(TempData.Properties.VariableNames{d}) = nan(size(data_table,1),1); | |
end | |
data_table_out.(TempData.Properties.VariableNames{d})(p) = TempData.(TempData.Properties.VariableNames{d}); | |
end | |
end | |
end | |
TempNamesAll = unique(TempNamesAll); | |
TempNamesAll = TempNamesAll(~ismember(TempNamesAll,{'lh_aparc_area','rh_aparc_area','Measure_volume'})); | |
% create the dictionnary | |
for i = 1:numel(TempNamesAll) | |
TempDict = Dict(1,:); | |
TempDict.Properties.VariableNames = Dict.Properties.VariableNames; | |
TempDict.varName = TempNamesAll{i}; | |
TempDict.Values = ['0:',num2str(ceil(max(data_table_out.(TempNamesAll{i})))),':0']; | |
TempDict.Type = 'continuous'; | |
TempDict.PRONIAvar = TempDict.varName; | |
OutDict = [OutDict;TempDict]; | |
end | |
OutDict = OutDict(2:end,:); | |
OutDict.Properties.VariableNames(ismember(OutDict.Properties.VariableNames,{'PRONIAvar'})) = {'VariableDescription'}; | |
OutDict(:,{'Recode','RacheleComments'}) = []; | |
%% | |
% FS_EulerNumber | |
end |