Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EPIC_Preprocessing_Pipeline/scripts/2_import_raw_DNAm_data.Rmd
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
168 lines (139 sloc)
6.91 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Script 2: Import raw DNAm data" | |
date: '`r format(Sys.time(), "%d %B, %Y")`' | |
output: html_document | |
--- | |
```{r runtime start, include=FALSE} | |
start_time <- Sys.time() | |
``` | |
```{r setup, include=FALSE} | |
needed_packages <- c("BiocManager", "knitr", "dplyr", "rmarkdown") | |
user_choices <- readRDS("../data/user_choices.rds") | |
if(user_choices$package_path != "PLEASE FILL IN"){ | |
.libPaths(c(user_choices$package_path, .libPaths())) | |
} | |
installed_packages <- needed_packages %in% rownames(installed.packages()) | |
if (any(installed_packages == FALSE)) { | |
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") | |
} | |
lapply(needed_packages, library, character.only = TRUE) | |
needed_biocmanager_packages <- c("minfi", "IlluminaHumanMethylationEPICanno.ilm10b4.hg19", | |
"IlluminaHumanMethylationEPICmanifest") | |
installed_biocmanager_packages <- needed_biocmanager_packages %in% rownames(installed.packages()) | |
if (any(installed_biocmanager_packages == FALSE)) { | |
BiocManager::install(needed_biocmanager_packages[!installed_biocmanager_packages]) | |
} | |
lapply(needed_biocmanager_packages, library, character.only = TRUE) | |
if (user_choices$array_type == "v2") { | |
if (!("IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) { | |
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest") | |
} | |
if (!("IlluminaHumanMethylationEPICv2anno.20a1.hg38" %in% rownames(installed.packages()))) { | |
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2anno.20a1.hg38") | |
} | |
library(IlluminaHumanMethylationEPICv2manifest) | |
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38) | |
} | |
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) | |
knitr::opts_chunk$set(echo = FALSE) | |
``` | |
# Description | |
This script: | |
* reads raw intensity signals (variables: red & green) from IDAT files to **RGChannelSet object** | |
* organizes data by probe level (variables: methylated & unmethylated) to **MethylSet object** | |
* converts MethylSet object to **RatioSet object** (variables: Beta and M values (logratio of Beta)) | |
* gets raw beta values (**Betas**) from RatioSet obejct | |
* maps RatioSet with genomic position of loci to create **gRatioSet** | |
* saves **phenotype data** (meta data) as **PhenoData** file from RGSet | |
* creates **summary file of preprocessing steps** to record current number of samples and probes as pipeline progresses. | |
<br> | |
**Further information about the data classes mentioned above is available in the minfi User's Guide:** | |
https://bioconductor.org/packages/devel/bioc/vignettes/minfi/inst/doc/minfi.html | |
<br> | |
**Note:** | |
Recording object dimensions (samples, probes) for the different files throughout the pipeline is very important. Please check the summary file with dimensions of object at the end of each step in the EPIC pipeline | |
```{r, create & save red green channel set, include = FALSE} | |
targets <- read.csv(paste0(user_choices$project_name, "/processed_data/phenotype_data.csv"), sep = ",") | |
RGSet_original <- read.metharray.exp(targets = targets) | |
if (user_choices$array_type == "v2") { | |
RGSet_original@annotation <- c(array = "IlluminaHumanMethylationEPICv2", annotation = "20a1.hg38") | |
} | |
save(RGSet_original, file = paste0(user_choices$project_name, "/processed_data/RGSet_original.Rdata")) | |
``` | |
```{r, create & save methyl set, include=FALSE} | |
Mset_original <- preprocessRaw(RGSet_original) | |
save(Mset_original, file = paste0(user_choices$project_name, "/processed_data/Mset_original.Rdata")) | |
``` | |
```{r, create & save ratio set, include=FALSE} | |
RatioSet_original <- ratioConvert(Mset_original, what = "both", keepCN = TRUE) | |
save(RatioSet_original, file = paste0(user_choices$project_name, "/processed_data/RatioSet_original.Rdata")) | |
``` | |
```{r, vreate & save raw bets as beta values matrix, include=FALSE} | |
Betas_original <- getBeta(RatioSet_original) | |
save(Betas_original, file = paste0(user_choices$project_name, "/processed_data/Betas_original.Rdata")) | |
``` | |
```{r, create & save gRatio set with genomic positions of loci, include=FALSE} | |
gRatioSet_original <- mapToGenome(RatioSet_original, mergeManifest=TRUE) | |
save(gRatioSet_original, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_original.Rdata")) | |
``` | |
```{r, create & save phenotype data file, include=FALSE} | |
PhenoData_original <- pData(RGSet_original) | |
save(PhenoData_original, file = paste0(user_choices$project_name, "/processed_data/PhenoData_original.Rdata")) | |
``` | |
```{r, create & save summary table file for preprocessing steps, include=FALSE} | |
dim_RGSet_original <- dim(RGSet_original) | |
dim_Betas_original <- dim(Betas_original) | |
step_number <- c("1", "1") | |
step <- c("Original", "Original") | |
data_class <- c("RGSet", "Betas") | |
samples <- c(dim_RGSet_original[2], dim_Betas_original[2]) | |
probes <- c(dim_RGSet_original[1], dim_Betas_original[1]) | |
summary_table_preprocessing <- data.frame(step_number, step, data_class, samples, probes) | |
saveRDS(summary_table_preprocessing, | |
paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) # save for future additions | |
``` | |
# Summaries of created data | |
```{r, present class structures} | |
RGSet_original | |
Mset_original | |
RatioSet_original | |
gRatioSet_original | |
``` | |
## User report | |
```{r, info output for user, results = 'asis'} | |
cat("RGChannelSet (RGSet), MethylSet (MSet), RatioSet, Betas and gRatioSet were created and named with the ending 'original'", | |
"All 'original' datasets were saved to the processed_data folder", | |
"Summary table of preprocessing steps was saved to the reports folder", | |
"Session info text file was updated", sep = "<br>\n") | |
``` | |
## Preprocessing summary table | |
```{r, preprocessing summary table} | |
summary_table_preprocessing %>% paged_table() | |
``` | |
```{r, document session info into a text file, include=FALSE} | |
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name, | |
"/session_info_all.txt"), "a+") | |
writeLines("######################################################################", connection) | |
writeLines("############################ Script 2: #############################", connection) | |
writeLines("######################################################################", connection) | |
sessioninfo <- sessionInfo() | |
writeLines("\nR Version:", connection) | |
writeLines(paste0(" ", sessioninfo$R.version$version.string), connection) | |
writeLines("\nAttached packages:", connection) | |
# nicely format packages (with version) | |
package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")})) | |
names(package_version) <- NULL | |
for(i in 1:length(package_version)) { | |
writeLines(package_version[i], connection) | |
} | |
writeLines("\n", connection) | |
close(connection) | |
``` | |
```{r runtime end, include=FALSE} | |
end_time <- Sys.time() | |
run_time <- as.numeric(round((end_time - start_time), 1), units = "mins") | |
``` | |
## Completion time | |
```{r, completion time, results = 'asis'} | |
cat("The time for the script to run was:", run_time, "minutes", sep = " ") | |
``` |