From e2b6650f9559c848f85e328d3c0c9b5cc000685a Mon Sep 17 00:00:00 2001 From: Natan Yusupov Date: Thu, 28 Mar 2024 11:48:24 +0100 Subject: [PATCH] normalization for unfiltered data added --- scripts/7_normalization.Rmd | 53 +++++++++++++++++-- ...moval_of_outliers_detect_batch_effects.Rmd | 1 + 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/scripts/7_normalization.Rmd b/scripts/7_normalization.Rmd index ef4718a..717691a 100644 --- a/scripts/7_normalization.Rmd +++ b/scripts/7_normalization.Rmd @@ -4,8 +4,6 @@ date: '`r format(Sys.time(), "%d %B, %Y")`' output: html_document --- -->>>>>>> To do: Do normalizetion also for _unfiltered data!!!! - ```{r runtime start, include=FALSE} start_time <- Sys.time() ``` @@ -45,6 +43,9 @@ if (user_choices$array_type == "v2") { load(paste0(user_choices$project_name, "/processed_data/RGSet_clean_filtered.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata")) load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) +load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) +load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) +load(paste0(user_choices$project_name, "/reports/annotations_clean.Rdata")) ``` # Description @@ -52,7 +53,7 @@ load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) This script: * annotates the probes -* normalizes data with stratified quantile normalization +* normalizes data with stratified quantile normalization for filtered and unfiltered data (followed by BMIQ = beta-mixtrure quantile normalization) to minimize unwanted technique-related variation within and between samples * visualizes beta densities before and after normalization @@ -71,6 +72,7 @@ save(annotations_clean_filtered, file = paste0(user_choices$project_name, "/repo ## Normalization of data performed ```{r normalize probes, include=FALSE} +# Normalization of filtered data RGSet_clean_filtered_quantile <- preprocessQuantile(RGSet_clean_filtered) save(RGSet_clean_filtered_quantile, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean_filtered_quantile.Rdata")) # output: GenomicRatioSet @@ -88,6 +90,25 @@ Betas_clean_filtered_quantile_bmiq <- apply(Betas_clean_filtered_quantile[,1:len function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R" save(Betas_clean_filtered_quantile_bmiq, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata")) + +# Normalization of unfiltered data +RGSet_clean_unfiltered_quantile <- preprocessQuantile(RGSet_clean) +save(RGSet_clean_unfiltered_quantile, file = paste0(user_choices$project_name, + "/processed_data/RGSet_clean_unfiltered_quantile.Rdata")) # output: GenomicRatioSet + +Betas_clean_unfiltered_quantile <- getBeta(RGSet_clean_unfiltered_quantile) +save(Betas_clean_unfiltered_quantile, file = paste0(user_choices$project_name, + "/processed_data/Betas_clean_unfiltered_quantile.Rdata")) +Mset_clean_unfiltered_qunatile <- getM(RGSet_clean_unfiltered_quantile) +save(Mset_clean_unfiltered_qunatile, file = paste0(user_choices$project_name, + "/processed_data/Mset_clean_unfiltered_qunatile.Rdata")) +# further normalization with BMIQ: +probeType <- as.data.frame(annotations_clean[rownames(Betas_clean_unfiltered_quantile),c("Name","Type")]) +probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2) +Betas_clean_unfiltered_quantile_bmiq <- apply(Betas_clean_unfiltered_quantile[,1:length(colnames(Betas_clean_unfiltered_quantile))],2, + function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R" +save(Betas_clean_unfiltered_quantile_bmiq, file = paste0(user_choices$project_name, + "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata")) ``` ## Beta values distribution report @@ -95,6 +116,10 @@ save(Betas_clean_filtered_quantile_bmiq, file = paste0(user_choices$project_name ```{r beta values distribution} densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Raw Betas", xlab = "Beta Value") +densityPlot(Betas_clean_unfiltered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, + pal = "darkblue", main = "Unfiltered and Quantile Adjusted Betas", xlab = "Beta Value") +densityPlot(Betas_clean_unfiltered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, + pal = "darkblue", main = "Unfiltered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, @@ -108,6 +133,10 @@ pdf(file = paste0(user_choices$project_name, "/reports/beta_distributions_raw_filtered_normalized.pdf")) densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Raw Betas", xlab = "Beta Value") +densityPlot(Betas_clean_unfiltered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, + pal = "darkblue", main = "Unfiltered and Quantile Adjusted Betas", xlab = "Beta Value") +densityPlot(Betas_clean_unfiltered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, + pal = "darkblue", main = "Unfiltered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, @@ -133,15 +162,29 @@ for (i in 1:ncol(Betas_clean_filtered_quantile)) { densityPlot(i_mat, pal = "darkblue", main = names_filtered_quantileBetas[i]) } dev.off() + +names_unfiltered_quantileBetas <- colnames(Betas_clean_unfiltered_quantile) +pdf(paste0(user_choices$project_name, "/reports/beta_densities_unfiltered_quantile_normalized.pdf")) +for (i in 1:ncol(Betas_clean_unfiltered_quantile)) { + i_mat <- as.matrix(Betas_clean_unfiltered_quantile[ ,i]) + name <- colnames(Betas_clean_unfiltered_quantile[,i]) + densityPlot(i_mat, pal = "darkblue", main = names_unfiltered_quantileBetas[i]) +} +dev.off() ``` ## User report ```{r, info output for user 1, results='asis'} -cat("Data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ", +cat("Filtered and unfiltered data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ", sep = "
\n") +if (length(which(is.nan(Betas_clean_unfiltered_quantile_bmiq))) == 0) { + cat("There were no NAs in unfiltered data after normalization", sep = "
\n") +} else { + cat("NAs were produced during normalization, please re-check!", sep = "
\n") +} if (length(which(is.nan(Betas_clean_filtered_quantile_bmiq))) == 0) { - cat("There were no NAs after normalization", sep = "
\n") + cat("There were no NAs in filtered data after normalization", sep = "
\n") } else { cat("NAs were produced during normalization, please re-check!", sep = "
\n") } diff --git a/scripts/8_removal_of_outliers_detect_batch_effects.Rmd b/scripts/8_removal_of_outliers_detect_batch_effects.Rmd index e7719c0..94c6a26 100644 --- a/scripts/8_removal_of_outliers_detect_batch_effects.Rmd +++ b/scripts/8_removal_of_outliers_detect_batch_effects.Rmd @@ -69,6 +69,7 @@ knitr::opts_chunk$set(echo = FALSE) ```{r load data, include=FALSE} summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) +load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata"))