Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EPIC_Preprocessing_Pipeline/scripts/7_normalization.Rmd
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
177 lines (150 sloc)
7.38 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Script 6: Normalization" | |
date: '`r format(Sys.time(), "%d %B, %Y")`' | |
output: html_document | |
--- | |
```{r runtime start, include=FALSE} | |
start_time <- Sys.time() | |
``` | |
```{r setup, include=FALSE} | |
source("../data/BMIQ_1.6_Teschendorff.R") # load script for BMIQ normalization | |
needed_packages <- c("BiocManager","RPMM", "knitr", "dplyr", "data.table") | |
installed_packages <- needed_packages %in% rownames(installed.packages()) | |
if (any(installed_packages == FALSE)) { | |
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") | |
} | |
lapply(needed_packages, library, character.only = TRUE) | |
if (!("minfi" %in% rownames(installed.packages()))) { | |
BiocManager::install("minfi") | |
} | |
library(minfi) | |
user_choices <- readRDS("../data/user_choices.rds") | |
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) | |
knitr::opts_chunk$set(echo = FALSE) | |
if (user_choices$array_type == "v2") { | |
if (!("jokergoo/IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) { | |
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest") | |
} | |
if (!("IlluminaHumanMethylationEPICv2anno.20a1.hg38" %in% rownames(installed.packages()))) { | |
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2anno.20a1.hg38") | |
} | |
library(IlluminaHumanMethylationEPICv2manifest) | |
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38) | |
} | |
``` | |
```{r load data, include=FALSE} | |
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) | |
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) | |
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) | |
``` | |
# Description | |
This script: | |
* annotates the probes | |
* normalizes data with stratified quantile normalization | |
(followed by BMIQ = beta-mixtrure quantile normalization) to minimize unwanted | |
technique-related variation within and between samples | |
* visualizes beta densities before and after normalization | |
* creates two pdf files of beta densities plots per sample for raw and normalized | |
data for further investigation. These files are saved to reports folder. | |
<br> | |
**Note** | |
The user can further alter the normalization methods if needed (e.g. include noob etc.) | |
```{r get annotation of probes, include=FALSE} | |
annotations_clean = getAnnotation(RGSet_clean) | |
save(annotations_clean, file = paste0(user_choices$project_name, "/reports/annotations_clean.Rdata")) | |
``` | |
## Normalization of data performed | |
```{r normalize probes, include=FALSE} | |
RGSet_clean_quantile <- preprocessQuantile(RGSet_clean) | |
save(RGSet_clean_quantile, file = paste0(user_choices$project_name, | |
"/processed_data/RGSet_clean_quantile.Rdata")) # output: GenomicRatioSet | |
Betas_clean_quantile <- getBeta(RGSet_clean_quantile) | |
save(Betas_clean_quantile, file = paste0(user_choices$project_name, | |
"/processed_data/Betas_clean_quantile.Rdata")) | |
Mset_clean_qunatile <- getM(RGSet_clean_quantile) | |
save(Mset_clean_qunatile, file = paste0(user_choices$project_name, | |
"/processed_data/Mset_clean_qunatile.Rdata")) | |
# further normalization with BMIQ: | |
probeType <- as.data.frame(annotations_clean[rownames(Betas_clean_quantile),c("Name","Type")]) | |
probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2) | |
Betas_clean_quantile_bmiq <- apply(Betas_clean_quantile[,1:length(colnames(Betas_clean_quantile))],2, | |
function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R" | |
save(Betas_clean_quantile_bmiq, file = paste0(user_choices$project_name, | |
"/processed_data/Betas_clean_quantile_bmiq.Rdata")) | |
``` | |
## Beta values distribution report | |
```{r beta values distribution} | |
densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE, | |
pal = "darkblue", main = "Raw Betas", xlab = "Beta Value") | |
densityPlot(Betas_clean_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, | |
pal = "darkblue", main = "Quantile Adjusted Betas", xlab = "Beta Value") | |
densityPlot(Betas_clean_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, | |
pal = "darkblue", main = "Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") | |
``` | |
```{r, save reports as pdf, include=FALSE} | |
pdf(file = paste0(user_choices$project_name, | |
"/reports/beta_distributions_raw_normalized.pdf")) | |
densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE, | |
pal = "darkblue", main = "Raw Betas", xlab = "Beta Value") | |
densityPlot(Betas_clean_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, | |
pal = "darkblue", main = "Quantile Adjusted Betas", xlab = "Beta Value") | |
densityPlot(Betas_clean_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, | |
pal = "darkblue", main = "Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") | |
dev.off() | |
# additional plots to look for outliers in distribution: | |
names_Betas <- colnames(Betas_clean) | |
pdf(file = paste0(user_choices$project_name, "/reports/beta_densities_raw.pdf")) | |
for (i in 1:ncol(Betas_clean)) { | |
i_mat <- as.matrix(Betas_clean[ ,i]) | |
densityPlot(i_mat, pal = "darkblue", main = names_Betas[i]) | |
} | |
dev.off() | |
names_quantileBetas <- colnames(Betas_clean_quantile) | |
pdf(paste0(user_choices$project_name, "/reports/beta_densities_quantile_normalized.pdf")) | |
for (i in 1:ncol(Betas_clean_quantile)) { | |
i_mat <- as.matrix(Betas_clean_quantile[ ,i]) | |
name <- colnames(Betas_clean_quantile[,i]) | |
densityPlot(i_mat, pal = "darkblue", main = names_quantileBetas[i]) | |
} | |
dev.off() | |
``` | |
## User report | |
```{r, info output for user 1, results='asis'} | |
cat("Data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ", | |
sep = "<br>\n") | |
if (length(which(is.nan(Betas_clean_quantile_bmiq))) == 0) { | |
cat("There were no NAs after normalization", sep = "<br>\n") | |
} else { | |
cat("NAs were produced during normalization, please re-check!", sep = "<br>\n") | |
} | |
``` | |
```{r, info output for user 2, results='asis'} | |
cat("Session info text file was updated", sep = "<br>\n") | |
``` | |
```{r, document session info into a text file, include=FALSE} | |
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name, | |
"/session_info_all.txt"), "a+") | |
writeLines("######################################################################", connection) | |
writeLines("############################ Script 6: #############################", connection) | |
writeLines("######################################################################", connection) | |
sessioninfo <- sessionInfo() | |
writeLines("\nR Version:", connection) | |
writeLines(paste0(" ", sessioninfo$R.version$version.string), connection) | |
writeLines("\nAttached packages:", connection) | |
# nicely format packages (with version) | |
package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")})) | |
names(package_version) <- NULL | |
for(i in 1:length(package_version)) { | |
writeLines(package_version[i], connection) | |
} | |
writeLines("\n", connection) | |
close(connection) | |
``` | |
```{r runtime end, include=FALSE} | |
end_time <- Sys.time() | |
run_time <- as.numeric(round((end_time - start_time), 1), units = "mins") | |
``` | |
## Completion time | |
```{r, completion time, results = 'asis'} | |
cat("The time for the script to run was:", run_time, "minutes", sep = " ") | |
``` |