Skip to content
Permalink
d1c9c4b430
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
177 lines (150 sloc) 7.38 KB
---
title: "Script 6: Normalization"
date: '`r format(Sys.time(), "%d %B, %Y")`'
output: html_document
---
```{r runtime start, include=FALSE}
start_time <- Sys.time()
```
```{r setup, include=FALSE}
source("../data/BMIQ_1.6_Teschendorff.R") # load script for BMIQ normalization
needed_packages <- c("BiocManager","RPMM", "knitr", "dplyr", "data.table")
installed_packages <- needed_packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
}
lapply(needed_packages, library, character.only = TRUE)
if (!("minfi" %in% rownames(installed.packages()))) {
BiocManager::install("minfi")
}
library(minfi)
user_choices <- readRDS("../data/user_choices.rds")
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
if (user_choices$array_type == "v2") {
if (!("jokergoo/IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) {
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest")
}
if (!("IlluminaHumanMethylationEPICv2anno.20a1.hg38" %in% rownames(installed.packages()))) {
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2anno.20a1.hg38")
}
library(IlluminaHumanMethylationEPICv2manifest)
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
}
```
```{r load data, include=FALSE}
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
```
# Description
This script:
* annotates the probes
* normalizes data with stratified quantile normalization
(followed by BMIQ = beta-mixtrure quantile normalization) to minimize unwanted
technique-related variation within and between samples
* visualizes beta densities before and after normalization
* creates two pdf files of beta densities plots per sample for raw and normalized
data for further investigation. These files are saved to reports folder.
<br>
**Note**
The user can further alter the normalization methods if needed (e.g. include noob etc.)
```{r get annotation of probes, include=FALSE}
annotations_clean = getAnnotation(RGSet_clean)
save(annotations_clean, file = paste0(user_choices$project_name, "/reports/annotations_clean.Rdata"))
```
## Normalization of data performed
```{r normalize probes, include=FALSE}
RGSet_clean_quantile <- preprocessQuantile(RGSet_clean)
save(RGSet_clean_quantile, file = paste0(user_choices$project_name,
"/processed_data/RGSet_clean_quantile.Rdata")) # output: GenomicRatioSet
Betas_clean_quantile <- getBeta(RGSet_clean_quantile)
save(Betas_clean_quantile, file = paste0(user_choices$project_name,
"/processed_data/Betas_clean_quantile.Rdata"))
Mset_clean_qunatile <- getM(RGSet_clean_quantile)
save(Mset_clean_qunatile, file = paste0(user_choices$project_name,
"/processed_data/Mset_clean_qunatile.Rdata"))
# further normalization with BMIQ:
probeType <- as.data.frame(annotations_clean[rownames(Betas_clean_quantile),c("Name","Type")])
probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2)
Betas_clean_quantile_bmiq <- apply(Betas_clean_quantile[,1:length(colnames(Betas_clean_quantile))],2,
function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R"
save(Betas_clean_quantile_bmiq, file = paste0(user_choices$project_name,
"/processed_data/Betas_clean_quantile_bmiq.Rdata"))
```
## Beta values distribution report
```{r beta values distribution}
densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Raw Betas", xlab = "Beta Value")
densityPlot(Betas_clean_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Quantile Adjusted Betas", xlab = "Beta Value")
densityPlot(Betas_clean_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Quantile-BMIQ Adjusted Betas", xlab = "Beta Value")
```
```{r, save reports as pdf, include=FALSE}
pdf(file = paste0(user_choices$project_name,
"/reports/beta_distributions_raw_normalized.pdf"))
densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Raw Betas", xlab = "Beta Value")
densityPlot(Betas_clean_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Quantile Adjusted Betas", xlab = "Beta Value")
densityPlot(Betas_clean_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE,
pal = "darkblue", main = "Quantile-BMIQ Adjusted Betas", xlab = "Beta Value")
dev.off()
# additional plots to look for outliers in distribution:
names_Betas <- colnames(Betas_clean)
pdf(file = paste0(user_choices$project_name, "/reports/beta_densities_raw.pdf"))
for (i in 1:ncol(Betas_clean)) {
i_mat <- as.matrix(Betas_clean[ ,i])
densityPlot(i_mat, pal = "darkblue", main = names_Betas[i])
}
dev.off()
names_quantileBetas <- colnames(Betas_clean_quantile)
pdf(paste0(user_choices$project_name, "/reports/beta_densities_quantile_normalized.pdf"))
for (i in 1:ncol(Betas_clean_quantile)) {
i_mat <- as.matrix(Betas_clean_quantile[ ,i])
name <- colnames(Betas_clean_quantile[,i])
densityPlot(i_mat, pal = "darkblue", main = names_quantileBetas[i])
}
dev.off()
```
## User report
```{r, info output for user 1, results='asis'}
cat("Data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ",
sep = "<br>\n")
if (length(which(is.nan(Betas_clean_quantile_bmiq))) == 0) {
cat("There were no NAs after normalization", sep = "<br>\n")
} else {
cat("NAs were produced during normalization, please re-check!", sep = "<br>\n")
}
```
```{r, info output for user 2, results='asis'}
cat("Session info text file was updated", sep = "<br>\n")
```
```{r, document session info into a text file, include=FALSE}
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name,
"/session_info_all.txt"), "a+")
writeLines("######################################################################", connection)
writeLines("############################ Script 6: #############################", connection)
writeLines("######################################################################", connection)
sessioninfo <- sessionInfo()
writeLines("\nR Version:", connection)
writeLines(paste0(" ", sessioninfo$R.version$version.string), connection)
writeLines("\nAttached packages:", connection)
# nicely format packages (with version)
package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")}))
names(package_version) <- NULL
for(i in 1:length(package_version)) {
writeLines(package_version[i], connection)
}
writeLines("\n", connection)
close(connection)
```
```{r runtime end, include=FALSE}
end_time <- Sys.time()
run_time <- as.numeric(round((end_time - start_time), 1), units = "mins")
```
## Completion time
```{r, completion time, results = 'asis'}
cat("The time for the script to run was:", run_time, "minutes", sep = " ")
```