Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
HARMONY/HARMONYHarmonisationScript.m
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
74 lines (72 sloc)
3.73 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% script to harmonise the data for HARMONY | |
% Created by Anne Ruef and Rachele Sanfelici @PRONIA Oct-2020 | |
% | |
% This script harmonise the data coming from the PRONIA data base into the | |
% harmony format. Several transformations are performed here: First a one | |
% to one mapping (one item in PRONIA data base correspond to one item in | |
% HARMONY), composite items (requires to compute a combination of PRONIA | |
% items to calculate a HARMONY item), IDs items (hard coded items specific | |
% to the PRONIA consortium site, double anonymisation) | |
% | |
% a priory requirements: | |
% | |
% A good relationship between a clinician (translate the clinical knowledge | |
% to rules or logic relationships) and a coder (translate the rules defined | |
% by the clinician to the machine language) | |
% | |
% The PRONIA data base is in a MATLAB table format. | |
% It contains all necessary or available items needed to compute HARMONY | |
% variables | |
% | |
% A translator file containing mapping between PRONIA items and HARMONY items. | |
% Format of the translator | |
% The header of the table should be like the following: | |
% 'ItemName' % new name of the item (in that case harmony) | |
% 'OriginalName' % original name from the PortalName (in that case pronia) | |
% 'Type' % Variable type (numeric, string, Date). for this | |
% work the Date is used to translate european date | |
% format to us date format | |
% | |
% A pseudo code that compute the items based on available data. Example | |
% extract race_white | |
% data_table_out.race_white(ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[1:6]),1) = 1; | |
% data_table_out.race_white(~ismember(data_table.DEMOG_T0_02_Ethnicity_T0,[1:6]),1) = 2; | |
%% load the datat | |
% load the PRONIA data | |
load('/volume/data/PRONIA/DataDump/16-Aug-2020/QueryData/PRONIAQueryTemplate_PRONIA_v3_1_RS4Harmony/PRONIAQueryTemplate_PRONIA_v3_1_RS4Harmony/DATA/11-Nov-2020/PRONIAQueryTemplate_PRONIA_v3_1_RS4Harmony_Data_all_11-Nov-2020.mat') | |
% define the path to the translator | |
Dictionnary = '/opt/PRONIASoftware/Developpment/AnneTesting/NDL_code/DataAllCron/DataQuery/DataDictionnary/HARMONY/Translator_AR_HARMONY.xlsx'; | |
% define the visits Currently not necessary, but if | |
% longitudinal data are required, this will become handy | |
Visits = {''}; | |
%% run the translation | |
[data_table_out,Dict] = HARMONYRules(data_table_all,Visits,Dictionnary); | |
% shuffle the rows randomly | |
[~,TempIdx] = sort(rand(size(data_table_out,1),1)); | |
data_table_out = data_table_out(TempIdx,:); | |
% replace all NaN to '' according to naming convention | |
data_table_out_char = table; | |
for i = 1:size(data_table_out,2) | |
if isnumeric(data_table_out{1,i}) | |
data_table_out_char{:,i} = cellstr(num2str(data_table_out{:,i})); | |
data_table_out_char{ismember(data_table_out_char{:,i},{'NaN'}),i} = {''}; | |
else | |
data_table_out_char{:,i} = data_table_out{:,i}; | |
end | |
end | |
data_table_out_char.Properties.VariableNames = data_table_out.Properties.VariableNames; | |
% write the clinical data | |
writetable(data_table_out_char(:,~ismember(data_table_out.Properties.VariableNames,Dict.VariableDescription)),... | |
['/opt/PRONIASoftware/Developpment/AnneTesting/NDL_code/DataAllCron/DataQuery/DataDictionnary/HARMONY',... | |
'/Real_Clinical_HARMONY.csv'],... | |
'Delimiter',',','QuoteStrings',true) | |
% write the MRI data | |
writetable(data_table_out_char(:,ismember(data_table_out.Properties.VariableNames,Dict.VariableDescription)),... | |
['/opt/PRONIASoftware/Developpment/AnneTesting/NDL_code/DataAllCron/DataQuery/DataDictionnary/HARMONY',... | |
'/Real_MRI_HARMONY.csv'],... | |
'Delimiter',',','QuoteStrings',true) | |
% save the MRI dictionnary | |
writetable(Dict,... | |
['/opt/PRONIASoftware/Developpment/AnneTesting/NDL_code/DataAllCron/DataQuery/DataDictionnary/HARMONY',... | |
'/MRI_Dictionnary_HARMONY.csv'],... | |
'Delimiter',',','QuoteStrings',true) | |