-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit: hg19 annotation and pseudo-EPICv1 version for EPICv2 …
…data only
- Loading branch information
Vera N. Karlbauer
committed
Aug 13, 2024
1 parent
180ce6c
commit 76ff990
Showing
1 changed file
with
191 additions
and
0 deletions.
There are no files selected for viewing
191 changes: 191 additions & 0 deletions
191
scripts/11_optional_create_pseudo_epicv1_version_for_v2.Rmd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
--- | ||
title: "Script 11 (optional): Pseudo-EPICv1 version of EPICv2 data" | ||
date: '`r format(Sys.time(), "%d %B, %Y")`' | ||
output: html_document | ||
--- | ||
|
||
# Description | ||
|
||
This script | ||
* creates a hg19 annotation of filtered and unfiltered data from the EPICv2 array (instead of the hg38 annotation | ||
provided by Illumina). The hg19 annotation is taken from: http://zwdzwd.github.io/InfiniumAnnotation/EPIC_hm450_hg19.html | ||
* creates a "pseudo-EPICv1" version of EPIC v2 data which only includes the probes found on the EPICv1 array. | ||
The probe names are replaced with the probe names from the EPICv1 array. | ||
|
||
Both of these steps are performed for filtered and unfiltered data. | ||
The pseudo-EPICv1 version is useful when using software designed for EPICv1, e.g. for clock calculation via methylclock or cell type estimation via EpiDISH. | ||
|
||
<br> | ||
**Instructions** | ||
Please only run this script if your data was generated using the EPICv2 array. | ||
|
||
```{r runtime start, include=FALSE} | ||
start_time <- Sys.time() | ||
``` | ||
|
||
```{r setup, include=FALSE} | ||
user_choices <- readRDS("../data/user_choices.rds") | ||
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) | ||
knitr::opts_chunk$set(echo = FALSE) | ||
if(user_choices$package_directory != "PLEASE FILL IN"){ | ||
.libPaths(c(user_choices$package_directory, .libPaths())) | ||
} | ||
needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "minfi", "janitor") | ||
installed_packages <- needed_packages %in% rownames(installed.packages()) | ||
if (!("minfi" %in% rownames(installed.packages()))) { | ||
BiocManager::install("minfi") | ||
} | ||
if (any(installed_packages == FALSE)) { | ||
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") | ||
} | ||
lapply(needed_packages, library, character.only = TRUE) | ||
library(minfi) | ||
library(janitor) | ||
``` | ||
|
||
```{r load data, include=FALSE} | ||
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) | ||
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq_combated.Rdata")) | ||
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq_combated.Rdata")) | ||
load(paste0(user_choices$project_name, "/reports/annotations_clean_filtered.Rdata")) | ||
load(paste0(user_choices$project_name, "/reports/annotations_clean_unfiltered.Rdata")) | ||
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) | ||
annotation_hg19 <- read.table(paste0(user_choices$personal_path, "/epic_preprocessing_k2h/data/EPICv2.hg19.manifest.tsv")) | ||
annotation_hg19 <- row_to_names(annotation_hg19, row_number = 1, remove_row = TRUE, remove_rows_above = TRUE) | ||
``` | ||
|
||
```{r, check array type, include = TRUE} | ||
if (user_choices$array_type == "v1") { | ||
cat("Please do not run script 11 if your data was generated using an EPICv1 array") | ||
knit_exit() | ||
} | ||
``` | ||
|
||
```{r, hg19 annotation filtered data, include = FALSE} | ||
annations_clean_filtered_hg19 <- annotation_hg19 %>% | ||
dplyr::filter(Probe_ID %in% rownames(Betas_clean_quantile_bmiq_filtered_combated)) | ||
save(annations_clean_filtered_hg19, file = paste0(user_choices$project_name, "/reports/annations_clean_filtered_hg19.Rdata")) | ||
``` | ||
|
||
```{r, hg19 annotation unfiltered data, include = FALSE} | ||
annotions_clean_unfiltered_hg19 <- annotation_hg19 %>% | ||
dplyr::filter(Probe_ID %in% rownames(Betas_clean_unfiltered_quantile_bmiq_combated)) | ||
save(annotions_clean_unfiltered_hg19, file = paste0(user_choices$project_name, "/reports/annotions_clean_unfiltered_hg19.Rdata")) | ||
``` | ||
|
||
```{r, create pseudo-EPICv1 version for filtered data, include=FALSE} | ||
# create annotation only containing filtered CpGs that are also on v1 and exclude any duplicated loci | ||
annotations_clean_filtered_pseudo_v1 <- annotations_clean_filtered %>% | ||
filter(Name %in% rownames(Betas_clean_filtered_quantile_bmiq_combated)) %>% | ||
filter(EPICv1_Loci != "") %>% | ||
filter(duplicated(EPICv1_Loci) == FALSE) | ||
# only keep betas that are on v1 and exclude any duplicated loci (keep first CpG) | ||
keep_betas <- (rownames(Betas_clean_filtered_quantile_bmiq_combated) %in% annotations_clean_filtered_pseudo_v1$Name) | ||
Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_filtered_quantile_bmiq_combated[keep_betas,] | ||
# order Beta rownames after annotation | ||
Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1[match(annotations_clean_filtered_pseudo_v1$Name, rownames(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1)),] | ||
# rename Beta rownames | ||
rownames(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1) <- annotations_clean_filtered_pseudo_v1$EPICv1_Loci | ||
# save Betas | ||
save(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_choices$project_name, "/processed_data/final_data/Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1.Rdata")) | ||
``` | ||
|
||
```{r, info output for filtered pseudo-EPICv1 version, results='asis'} | ||
keep_betas_df <- as.data.frame(keep_betas) | ||
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), | ||
" filtered CpGs were not found on EPICv1 and therefore excluded"), sep = "<br>\n") | ||
dim_Betas_filtered <- dim(Betas_clean_quantile_bmiq_filtered_combated_pseudo_v1) | ||
step_number <- c("14") | ||
step <- c("Create pseudo-EPICv1 version (filtered data)") | ||
data_class <- c("Betas") | ||
samples <- c(dim_Betas_filtered[2]) | ||
probes <- c(dim_Betas_filtered[1]) | ||
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) | ||
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) | ||
``` | ||
|
||
```{r, create pseudo-EPICv1 version for unfiltered data, include=FALSE} | ||
# create annotation only containing unfiltered CpGs that are also on v1 and exclude any duplicated loci | ||
annotations_clean_unfiltered_pseudo_v1 <- annotations_clean_unfiltered %>% | ||
filter(Name %in% rownames(Betas_clean_unfiltered_quantile_bmiq_combated)) %>% | ||
filter(EPICv1_Loci != "") %>% | ||
filter(duplicated(EPICv1_Loci) == FALSE) | ||
# only keep betas that are on v1 and exclude any duplicated loci (keep first CpG) | ||
keep_betas <- (rownames(Betas_clean_unfiltered_quantile_bmiq_combated) %in% annotations_clean_unfiltered_pseudo_v1$Name) | ||
Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_unfiltered_quantile_bmiq_combated[keep_betas,] | ||
# order Beta rownames after annotation | ||
Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1[match(annotations_clean_unfiltered_pseudo_v1$Name, rownames(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1)),] | ||
# rename Beta rownames | ||
rownames(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1) <- annotations_clean_unfiltered_pseudo_v1$EPICv1_Loci | ||
# save Betas | ||
save(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_choices$project_name, "/processed_data/final_data/Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1.Rdata")) | ||
``` | ||
|
||
```{r, info output for unfiltered pseudo-EPICv1 version, results='asis'} | ||
keep_betas_df <- as.data.frame(keep_betas) | ||
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), | ||
" unfiltered CpGs were not found on EPICv1 and therefore excluded"), sep = "<br>\n") | ||
dim_Betas_unfiltered <- dim(Betas_clean_quantile_bmiq_unfiltered_combated_pseudo_v1) | ||
step_number <- c("15") | ||
step <- c("Create pseudo-EPICv1 version (unfiltered data)") | ||
data_class <- c("Betas") | ||
samples <- c(dim_Betas_unfiltered[2]) | ||
probes <- c(dim_Betas_unfiltered[1]) | ||
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) | ||
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) | ||
``` | ||
|
||
## User report | ||
|
||
```{r, info output for user, results='asis'} | ||
cat("Session info text file was updated", sep = "<br>\n") | ||
``` | ||
|
||
## Preprocessing summary table | ||
|
||
```{r, preprocessing summary table} | ||
summary_table_preprocessing %>% paged_table() | ||
``` | ||
|
||
```{r, document session info into a text file, include=FALSE} | ||
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name, | ||
"/session_info_all.txt"), "a+") | ||
writeLines("######################################################################", connection) | ||
writeLines("############################ Script 6: #############################", connection) | ||
writeLines("######################################################################", connection) | ||
sessioninfo <- sessionInfo() | ||
writeLines("\nR Version:", connection) | ||
writeLines(paste0(" ", sessioninfo$R.version$version.string), connection) | ||
writeLines("\nAttached packages:", connection) | ||
# nicely format packages (with version) | ||
package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")})) | ||
names(package_version) <- NULL | ||
for(i in 1:length(package_version)) { | ||
writeLines(package_version[i], connection) | ||
} | ||
writeLines("\n", connection) | ||
close(connection) | ||
``` | ||
|
||
```{r runtime end, include=FALSE} | ||
end_time <- Sys.time() | ||
run_time <- as.numeric(round((end_time - start_time), 1), units = "mins") | ||
``` | ||
|
||
## Completion time | ||
|
||
```{r, completion time, results = 'asis'} | ||
cat("The time for the script to run was:", run_time, "minutes", sep = " ") | ||
``` |