diff --git a/scripts/7_normalization.Rmd b/scripts/7_normalization.Rmd
index ef4718a..717691a 100644
--- a/scripts/7_normalization.Rmd
+++ b/scripts/7_normalization.Rmd
@@ -4,8 +4,6 @@ date: '`r format(Sys.time(), "%d %B, %Y")`'
output: html_document
---
-->>>>>>> To do: Do normalizetion also for _unfiltered data!!!!
-
```{r runtime start, include=FALSE}
start_time <- Sys.time()
```
@@ -45,6 +43,9 @@ if (user_choices$array_type == "v2") {
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean_filtered.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
+load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
+load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
+load(paste0(user_choices$project_name, "/reports/annotations_clean.Rdata"))
```
# Description
@@ -52,7 +53,7 @@ load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
This script:
* annotates the probes
-* normalizes data with stratified quantile normalization
+* normalizes data with stratified quantile normalization for filtered and unfiltered data
(followed by BMIQ = beta-mixtrure quantile normalization) to minimize unwanted
technique-related variation within and between samples
* visualizes beta densities before and after normalization
@@ -71,6 +72,7 @@ save(annotations_clean_filtered, file = paste0(user_choices$project_name, "/repo
## Normalization of data performed
```{r normalize probes, include=FALSE}
+# Normalization of filtered data
RGSet_clean_filtered_quantile <- preprocessQuantile(RGSet_clean_filtered)
save(RGSet_clean_filtered_quantile, file = paste0(user_choices$project_name,
"/processed_data/RGSet_clean_filtered_quantile.Rdata")) # output: GenomicRatioSet
@@ -88,6 +90,25 @@ Betas_clean_filtered_quantile_bmiq <- apply(Betas_clean_filtered_quantile[,1:len
function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R"
save(Betas_clean_filtered_quantile_bmiq, file = paste0(user_choices$project_name,
"/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata"))
+
+# Normalization of unfiltered data
+RGSet_clean_unfiltered_quantile <- preprocessQuantile(RGSet_clean)
+save(RGSet_clean_unfiltered_quantile, file = paste0(user_choices$project_name,
+ "/processed_data/RGSet_clean_unfiltered_quantile.Rdata")) # output: GenomicRatioSet
+
+Betas_clean_unfiltered_quantile <- getBeta(RGSet_clean_unfiltered_quantile)
+save(Betas_clean_unfiltered_quantile, file = paste0(user_choices$project_name,
+ "/processed_data/Betas_clean_unfiltered_quantile.Rdata"))
+Mset_clean_unfiltered_qunatile <- getM(RGSet_clean_unfiltered_quantile)
+save(Mset_clean_unfiltered_qunatile, file = paste0(user_choices$project_name,
+ "/processed_data/Mset_clean_unfiltered_qunatile.Rdata"))
+# further normalization with BMIQ:
+probeType <- as.data.frame(annotations_clean[rownames(Betas_clean_unfiltered_quantile),c("Name","Type")])
+probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2)
+Betas_clean_unfiltered_quantile_bmiq <- apply(Betas_clean_unfiltered_quantile[,1:length(colnames(Betas_clean_unfiltered_quantile))],2,
+ function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R"
+save(Betas_clean_unfiltered_quantile_bmiq, file = paste0(user_choices$project_name,
+ "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata"))
```
## Beta values distribution report
@@ -95,6 +116,10 @@ save(Betas_clean_filtered_quantile_bmiq, file = paste0(user_choices$project_name
```{r beta values distribution}
densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Raw Betas", xlab = "Beta Value")
+densityPlot(Betas_clean_unfiltered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE,
+ pal = "darkblue", main = "Unfiltered and Quantile Adjusted Betas", xlab = "Beta Value")
+densityPlot(Betas_clean_unfiltered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE,
+ pal = "darkblue", main = "Unfiltered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value")
densityPlot(Betas_clean_filtered, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Filtered Betas", xlab = "Beta Value")
densityPlot(Betas_clean_filtered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE,
@@ -108,6 +133,10 @@ pdf(file = paste0(user_choices$project_name,
"/reports/beta_distributions_raw_filtered_normalized.pdf"))
densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Raw Betas", xlab = "Beta Value")
+densityPlot(Betas_clean_unfiltered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE,
+ pal = "darkblue", main = "Unfiltered and Quantile Adjusted Betas", xlab = "Beta Value")
+densityPlot(Betas_clean_unfiltered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE,
+ pal = "darkblue", main = "Unfiltered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value")
densityPlot(Betas_clean_filtered, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Filtered Betas", xlab = "Beta Value")
densityPlot(Betas_clean_filtered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE,
@@ -133,15 +162,29 @@ for (i in 1:ncol(Betas_clean_filtered_quantile)) {
densityPlot(i_mat, pal = "darkblue", main = names_filtered_quantileBetas[i])
}
dev.off()
+
+names_unfiltered_quantileBetas <- colnames(Betas_clean_unfiltered_quantile)
+pdf(paste0(user_choices$project_name, "/reports/beta_densities_unfiltered_quantile_normalized.pdf"))
+for (i in 1:ncol(Betas_clean_unfiltered_quantile)) {
+ i_mat <- as.matrix(Betas_clean_unfiltered_quantile[ ,i])
+ name <- colnames(Betas_clean_unfiltered_quantile[,i])
+ densityPlot(i_mat, pal = "darkblue", main = names_unfiltered_quantileBetas[i])
+}
+dev.off()
```
## User report
```{r, info output for user 1, results='asis'}
-cat("Data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ",
+cat("Filtered and unfiltered data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ",
sep = "
\n")
+if (length(which(is.nan(Betas_clean_unfiltered_quantile_bmiq))) == 0) {
+ cat("There were no NAs in unfiltered data after normalization", sep = "
\n")
+} else {
+ cat("NAs were produced during normalization, please re-check!", sep = "
\n")
+}
if (length(which(is.nan(Betas_clean_filtered_quantile_bmiq))) == 0) {
- cat("There were no NAs after normalization", sep = "
\n")
+ cat("There were no NAs in filtered data after normalization", sep = "
\n")
} else {
cat("NAs were produced during normalization, please re-check!", sep = "
\n")
}
diff --git a/scripts/8_removal_of_outliers_detect_batch_effects.Rmd b/scripts/8_removal_of_outliers_detect_batch_effects.Rmd
index e7719c0..94c6a26 100644
--- a/scripts/8_removal_of_outliers_detect_batch_effects.Rmd
+++ b/scripts/8_removal_of_outliers_detect_batch_effects.Rmd
@@ -69,6 +69,7 @@ knitr::opts_chunk$set(echo = FALSE)
```{r load data, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
+load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata"))