From 20503d264935f2d16b5e282c67a61709318517ee Mon Sep 17 00:00:00 2001 From: Natan Yusupov Date: Fri, 12 Apr 2024 10:45:56 +0200 Subject: [PATCH] pheno data correction column types --- ...10_correction_batch_effects_unfiltered.Rmd | 19 +++++++++++++------ .../9_correction_batch_effects_filtered.Rmd | 19 +++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/scripts/10_correction_batch_effects_unfiltered.Rmd b/scripts/10_correction_batch_effects_unfiltered.Rmd index 297e3bd..1fadb42 100644 --- a/scripts/10_correction_batch_effects_unfiltered.Rmd +++ b/scripts/10_correction_batch_effects_unfiltered.Rmd @@ -53,6 +53,13 @@ knitr::opts_chunk$set(echo = FALSE) summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata")) load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) +phenodata_clean_df <- as.data.frame(PhenoData_clean@listData) %>% + dplyr::select(personid, plate, slide, array, row, column, arrayid) %>% + mutate(plate = as.factor(plate), + slide = as.factor(slide), + array = as.factor(array), + row = as.factor(row), + column = as.factor(column)) load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) phenotype_data <- readRDS(paste0(user_choices$project_name, "/processed_data/phenotype_data.rds")) ``` @@ -65,8 +72,8 @@ m_values <- apply(Betas_clean_unfiltered_quantile_bmiq, 2, function(x) log2((x)/ if (correction_variable_1 == "PLEASE FILL IN"){ print("No first batch correction variable was specified by user, data remains unchanged") } else { - mod <- model.matrix(~1, data = PhenoData_clean) - m_values_combat_1 <- ComBat(m_values, batch = correction_variable_1, mod = mod) + mod <- model.matrix(~1, data = phenodata_clean_df) + m_values_combat_1 <- ComBat(m_values, batch = phenodata_clean_df[,correction_variable_1], mod = mod) save(m_values_combat_1, file = paste0(user_choices$project_name, "/processed_data/m_values_unfiltered_combat_1_", correction_variable_1,".Rdata")) print(paste0("M-values were first corrected for ", correction_variable_1," and were saved to the processed_data folder")) @@ -224,8 +231,8 @@ if (correction_variable_1 == "PLEASE FILL IN"){ if (correction_variable_2 == "PLEASE FILL IN"){ print("No second batch correction variable was specified by user, data remains unchanged") } else { - mod <- model.matrix(~1, data = PhenoData_clean) - m_values_combat_2 <- ComBat(m_values_combat_1, batch = correction_variable_2, mod = mod) + mod <- model.matrix(~1, data = phenodata_clean_df) + m_values_combat_2 <- ComBat(m_values_combat_1, batch = phenodata_clean_df[,correction_variable_2], mod = mod) save(m_values_combat_2, file = paste0(user_choices$project_name, "/processed_data/m_values_unfiltered_combat_2_", correction_variable_2,".Rdata")) print(paste0("Second, M-values were corrected for ", correction_variable_2," and were saved to the processed data folder")) @@ -383,8 +390,8 @@ if (correction_variable_2 == "PLEASE FILL IN"){ if (correction_variable_3 == "PLEASE FILL IN"){ print("No third batch correction variable was specified by user, data remains unchanged") } else { - mod <- model.matrix(~1, data = PhenoData_clean) - m_values_combat_3 <- ComBat(m_values_combat_2, batch = correction_variable_3, mod = mod) + mod <- model.matrix(~1, data = phenodata_clean_df) + m_values_combat_3 <- ComBat(m_values_combat_2, batch = phenodata_clean_df[,correction_variable_3], mod = mod) save(m_values_combat_3, file = paste0(user_choices$project_name, "/processed_data/m_values_unfiltered_combat_3_", correction_variable_3,".Rdata")) print(paste0("Third, M-values were corrected for ", correction_variable_3," and were saved to the processed_data folder")) diff --git a/scripts/9_correction_batch_effects_filtered.Rmd b/scripts/9_correction_batch_effects_filtered.Rmd index 6f3ba58..c8f251c 100644 --- a/scripts/9_correction_batch_effects_filtered.Rmd +++ b/scripts/9_correction_batch_effects_filtered.Rmd @@ -64,6 +64,13 @@ knitr::opts_chunk$set(echo = FALSE) summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata")) load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) +phenodata_clean_df <- as.data.frame(PhenoData_clean@listData) %>% + dplyr::select(personid, plate, slide, array, row, column, arrayid) %>% + mutate(plate = as.factor(plate), + slide = as.factor(slide), + array = as.factor(array), + row = as.factor(row), + column = as.factor(column)) phenotype_data <- readRDS(paste0(user_choices$project_name, "/processed_data/phenotype_data.rds")) ``` @@ -75,8 +82,8 @@ m_values <- apply(Betas_clean_filtered_quantile_bmiq, 2, function(x) log2((x)/(1 if (correction_variable_1 == "PLEASE FILL IN"){ print("No first batch correction variable was specified by user, data remains unchanged") } else { - mod <- model.matrix(~1, data = PhenoData_clean) - m_values_combat_1 <- ComBat(m_values, batch = correction_variable_1, mod = mod) + mod <- model.matrix(~1, data = phenodata_clean_df) + m_values_combat_1 <- ComBat(m_values, batch = phenodata_clean_df[,correction_variable_1], mod = mod) save(m_values_combat_1, file = paste0(user_choices$project_name, "/processed_data/m_values_filtered_combat_1_", correction_variable_1,".Rdata")) print(paste0("M-values were first corrected for ", correction_variable_1," and were saved to the processed_data folder")) @@ -234,8 +241,8 @@ if (correction_variable_1 == "PLEASE FILL IN"){ if (correction_variable_2 == "PLEASE FILL IN"){ print("No second batch correction variable was specified by user, data remains unchanged") } else { - mod <- model.matrix(~1, data = PhenoData_clean) - m_values_combat_2 <- ComBat(m_values_combat_1, batch = correction_variable_2, mod = mod) + mod <- model.matrix(~1, data = phenodata_clean_df) + m_values_combat_2 <- ComBat(m_values_combat_1, batch = phenodata_clean_df[,correction_variable_2], mod = mod) save(m_values_combat_2, file = paste0(user_choices$project_name, "/processed_data/m_values_filtered_combat_2_", correction_variable_2,".Rdata")) print(paste0("Second. M-values were corrected for ", correction_variable_2," and were saved to the processed data folder")) @@ -393,8 +400,8 @@ if (correction_variable_2 == "PLEASE FILL IN"){ if (correction_variable_3 == "PLEASE FILL IN"){ print("No third batch correction variable was specified by user, data remains unchanged") } else { - mod <- model.matrix(~1, data = PhenoData_clean) - m_values_combat_3 <- ComBat(m_values_combat_2, batch = correction_variable_3, mod = mod) + mod <- model.matrix(~1, data = phenodata_clean_df) + m_values_combat_3 <- ComBat(m_values_combat_2, batch = phenodata_clean_df[,correction_variable_3], mod = mod) save(m_values_combat_3, file = paste0(user_choices$project_name, "/processed_data/m_values_filtered_combat_3_", correction_variable_3,".Rdata")) print(paste0("Third, M-values were corrected for ", correction_variable_3," were saved to the processed_data folder"))