Skip to content
Permalink
f1bd3ab1ae
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
168 lines (139 sloc) 6.91 KB
---
title: "Script 2: Import raw DNAm data"
date: '`r format(Sys.time(), "%d %B, %Y")`'
output: html_document
---
```{r runtime start, include=FALSE}
start_time <- Sys.time()
```
```{r setup, include=FALSE}
needed_packages <- c("BiocManager", "knitr", "dplyr", "rmarkdown")
user_choices <- readRDS("../data/user_choices.rds")
if(user_choices$package_path != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_path, .libPaths()))
}
installed_packages <- needed_packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
}
lapply(needed_packages, library, character.only = TRUE)
needed_biocmanager_packages <- c("minfi", "IlluminaHumanMethylationEPICanno.ilm10b4.hg19",
"IlluminaHumanMethylationEPICmanifest")
installed_biocmanager_packages <- needed_biocmanager_packages %in% rownames(installed.packages())
if (any(installed_biocmanager_packages == FALSE)) {
BiocManager::install(needed_biocmanager_packages[!installed_biocmanager_packages])
}
lapply(needed_biocmanager_packages, library, character.only = TRUE)
if (user_choices$array_type == "v2") {
if (!("IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) {
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest")
}
if (!("IlluminaHumanMethylationEPICv2anno.20a1.hg38" %in% rownames(installed.packages()))) {
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2anno.20a1.hg38")
}
library(IlluminaHumanMethylationEPICv2manifest)
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
}
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
```
# Description
This script:
* reads raw intensity signals (variables: red & green) from IDAT files to **RGChannelSet object**
* organizes data by probe level (variables: methylated & unmethylated) to **MethylSet object**
* converts MethylSet object to **RatioSet object** (variables: Beta and M values (logratio of Beta))
* gets raw beta values (**Betas**) from RatioSet obejct
* maps RatioSet with genomic position of loci to create **gRatioSet**
* saves **phenotype data** (meta data) as **PhenoData** file from RGSet
* creates **summary file of preprocessing steps** to record current number of samples and probes as pipeline progresses.
<br>
**Further information about the data classes mentioned above is available in the minfi User's Guide:**
https://bioconductor.org/packages/devel/bioc/vignettes/minfi/inst/doc/minfi.html
<br>
**Note:**
Recording object dimensions (samples, probes) for the different files throughout the pipeline is very important. Please check the summary file with dimensions of object at the end of each step in the EPIC pipeline
```{r, create & save red green channel set, include = FALSE}
targets <- read.csv(paste0(user_choices$project_name, "/processed_data/phenotype_data.csv"), sep = ",")
RGSet_original <- read.metharray.exp(targets = targets)
if (user_choices$array_type == "v2") {
RGSet_original@annotation <- c(array = "IlluminaHumanMethylationEPICv2", annotation = "20a1.hg38")
}
save(RGSet_original, file = paste0(user_choices$project_name, "/processed_data/RGSet_original.Rdata"))
```
```{r, create & save methyl set, include=FALSE}
Mset_original <- preprocessRaw(RGSet_original)
save(Mset_original, file = paste0(user_choices$project_name, "/processed_data/Mset_original.Rdata"))
```
```{r, create & save ratio set, include=FALSE}
RatioSet_original <- ratioConvert(Mset_original, what = "both", keepCN = TRUE)
save(RatioSet_original, file = paste0(user_choices$project_name, "/processed_data/RatioSet_original.Rdata"))
```
```{r, vreate & save raw bets as beta values matrix, include=FALSE}
Betas_original <- getBeta(RatioSet_original)
save(Betas_original, file = paste0(user_choices$project_name, "/processed_data/Betas_original.Rdata"))
```
```{r, create & save gRatio set with genomic positions of loci, include=FALSE}
gRatioSet_original <- mapToGenome(RatioSet_original, mergeManifest=TRUE)
save(gRatioSet_original, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_original.Rdata"))
```
```{r, create & save phenotype data file, include=FALSE}
PhenoData_original <- pData(RGSet_original)
save(PhenoData_original, file = paste0(user_choices$project_name, "/processed_data/PhenoData_original.Rdata"))
```
```{r, create & save summary table file for preprocessing steps, include=FALSE}
dim_RGSet_original <- dim(RGSet_original)
dim_Betas_original <- dim(Betas_original)
step_number <- c("1", "1")
step <- c("Original", "Original")
data_class <- c("RGSet", "Betas")
samples <- c(dim_RGSet_original[2], dim_Betas_original[2])
probes <- c(dim_RGSet_original[1], dim_Betas_original[1])
summary_table_preprocessing <- data.frame(step_number, step, data_class, samples, probes)
saveRDS(summary_table_preprocessing,
paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) # save for future additions
```
# Summaries of created data
```{r, present class structures}
RGSet_original
Mset_original
RatioSet_original
gRatioSet_original
```
## User report
```{r, info output for user, results = 'asis'}
cat("RGChannelSet (RGSet), MethylSet (MSet), RatioSet, Betas and gRatioSet were created and named with the ending 'original'",
"All 'original' datasets were saved to the processed_data folder",
"Summary table of preprocessing steps was saved to the reports folder",
"Session info text file was updated", sep = "<br>\n")
```
## Preprocessing summary table
```{r, preprocessing summary table}
summary_table_preprocessing %>% paged_table()
```
```{r, document session info into a text file, include=FALSE}
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name,
"/session_info_all.txt"), "a+")
writeLines("######################################################################", connection)
writeLines("############################ Script 2: #############################", connection)
writeLines("######################################################################", connection)
sessioninfo <- sessionInfo()
writeLines("\nR Version:", connection)
writeLines(paste0(" ", sessioninfo$R.version$version.string), connection)
writeLines("\nAttached packages:", connection)
# nicely format packages (with version)
package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")}))
names(package_version) <- NULL
for(i in 1:length(package_version)) {
writeLines(package_version[i], connection)
}
writeLines("\n", connection)
close(connection)
```
```{r runtime end, include=FALSE}
end_time <- Sys.time()
run_time <- as.numeric(round((end_time - start_time), 1), units = "mins")
```
## Completion time
```{r, completion time, results = 'asis'}
cat("The time for the script to run was:", run_time, "minutes", sep = " ")
```