Skip to content

Commit

Permalink
Initial commit: hg19 annotation and pseudo-EPICv1 version for EPICv2 …
Browse files Browse the repository at this point in the history
…data only
  • Loading branch information
Vera N. Karlbauer committed Aug 13, 2024
1 parent 180ce6c commit 76ff990
Showing 1 changed file with 191 additions and 0 deletions.
191 changes: 191 additions & 0 deletions scripts/11_optional_create_pseudo_epicv1_version_for_v2.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
---
title: "Script 11 (optional): Pseudo-EPICv1 version of EPICv2 data"
date: '`r format(Sys.time(), "%d %B, %Y")`'
output: html_document
---

# Description

This script
* creates a hg19 annotation of filtered and unfiltered data from the EPICv2 array (instead of the hg38 annotation
provided by Illumina). The hg19 annotation is taken from: http://zwdzwd.github.io/InfiniumAnnotation/EPIC_hm450_hg19.html
* creates a "pseudo-EPICv1" version of EPIC v2 data which only includes the probes found on the EPICv1 array.
The probe names are replaced with the probe names from the EPICv1 array.

Both of these steps are performed for filtered and unfiltered data.
The pseudo-EPICv1 version is useful when using software designed for EPICv1, e.g. for clock calculation via methylclock or cell type estimation via EpiDISH.

<br>
**Instructions**
Please only run this script if your data was generated using the EPICv2 array.

```{r runtime start, include=FALSE}
start_time <- Sys.time()
```

```{r setup, include=FALSE}
user_choices <- readRDS("../data/user_choices.rds")
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
if(user_choices$package_directory != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_directory, .libPaths()))
}
needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "minfi", "janitor")
installed_packages <- needed_packages %in% rownames(installed.packages())
if (!("minfi" %in% rownames(installed.packages()))) {
BiocManager::install("minfi")
}
if (any(installed_packages == FALSE)) {
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
}
lapply(needed_packages, library, character.only = TRUE)
library(minfi)
library(janitor)
```

```{r load data, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq_combated.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq_combated.Rdata"))
load(paste0(user_choices$project_name, "/reports/annotations_clean_filtered.Rdata"))
load(paste0(user_choices$project_name, "/reports/annotations_clean_unfiltered.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
annotation_hg19 <- read.table(paste0(user_choices$personal_path, "/epic_preprocessing_k2h/data/EPICv2.hg19.manifest.tsv"))
annotation_hg19 <- row_to_names(annotation_hg19, row_number = 1, remove_row = TRUE, remove_rows_above = TRUE)
```

```{r, check array type, include = TRUE}
if (user_choices$array_type == "v1") {
cat("Please do not run script 11 if your data was generated using an EPICv1 array")
knit_exit()
}
```

```{r, hg19 annotation filtered data, include = FALSE}
annations_clean_filtered_hg19 <- annotation_hg19 %>%
dplyr::filter(Probe_ID %in% rownames(Betas_clean_quantile_bmiq_filtered_combated))
save(annations_clean_filtered_hg19, file = paste0(user_choices$project_name, "/reports/annations_clean_filtered_hg19.Rdata"))
```

```{r, hg19 annotation unfiltered data, include = FALSE}
annotions_clean_unfiltered_hg19 <- annotation_hg19 %>%
dplyr::filter(Probe_ID %in% rownames(Betas_clean_unfiltered_quantile_bmiq_combated))
save(annotions_clean_unfiltered_hg19, file = paste0(user_choices$project_name, "/reports/annotions_clean_unfiltered_hg19.Rdata"))
```

```{r, create pseudo-EPICv1 version for filtered data, include=FALSE}
# create annotation only containing filtered CpGs that are also on v1 and exclude any duplicated loci
annotations_clean_filtered_pseudo_v1 <- annotations_clean_filtered %>%
filter(Name %in% rownames(Betas_clean_filtered_quantile_bmiq_combated)) %>%
filter(EPICv1_Loci != "") %>%
filter(duplicated(EPICv1_Loci) == FALSE)
# only keep betas that are on v1 and exclude any duplicated loci (keep first CpG)
keep_betas <- (rownames(Betas_clean_filtered_quantile_bmiq_combated) %in% annotations_clean_filtered_pseudo_v1$Name)
Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_filtered_quantile_bmiq_combated[keep_betas,]
# order Beta rownames after annotation
Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1[match(annotations_clean_filtered_pseudo_v1$Name, rownames(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1)),]
# rename Beta rownames
rownames(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1) <- annotations_clean_filtered_pseudo_v1$EPICv1_Loci
# save Betas
save(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_choices$project_name, "/processed_data/final_data/Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1.Rdata"))
```

```{r, info output for filtered pseudo-EPICv1 version, results='asis'}
keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" filtered CpGs were not found on EPICv1 and therefore excluded"), sep = "<br>\n")
dim_Betas_filtered <- dim(Betas_clean_quantile_bmiq_filtered_combated_pseudo_v1)
step_number <- c("14")
step <- c("Create pseudo-EPICv1 version (filtered data)")
data_class <- c("Betas")
samples <- c(dim_Betas_filtered[2])
probes <- c(dim_Betas_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
```

```{r, create pseudo-EPICv1 version for unfiltered data, include=FALSE}
# create annotation only containing unfiltered CpGs that are also on v1 and exclude any duplicated loci
annotations_clean_unfiltered_pseudo_v1 <- annotations_clean_unfiltered %>%
filter(Name %in% rownames(Betas_clean_unfiltered_quantile_bmiq_combated)) %>%
filter(EPICv1_Loci != "") %>%
filter(duplicated(EPICv1_Loci) == FALSE)
# only keep betas that are on v1 and exclude any duplicated loci (keep first CpG)
keep_betas <- (rownames(Betas_clean_unfiltered_quantile_bmiq_combated) %in% annotations_clean_unfiltered_pseudo_v1$Name)
Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_unfiltered_quantile_bmiq_combated[keep_betas,]
# order Beta rownames after annotation
Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1 <- Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1[match(annotations_clean_unfiltered_pseudo_v1$Name, rownames(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1)),]
# rename Beta rownames
rownames(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1) <- annotations_clean_unfiltered_pseudo_v1$EPICv1_Loci
# save Betas
save(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_choices$project_name, "/processed_data/final_data/Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1.Rdata"))
```

```{r, info output for unfiltered pseudo-EPICv1 version, results='asis'}
keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" unfiltered CpGs were not found on EPICv1 and therefore excluded"), sep = "<br>\n")
dim_Betas_unfiltered <- dim(Betas_clean_quantile_bmiq_unfiltered_combated_pseudo_v1)
step_number <- c("15")
step <- c("Create pseudo-EPICv1 version (unfiltered data)")
data_class <- c("Betas")
samples <- c(dim_Betas_unfiltered[2])
probes <- c(dim_Betas_unfiltered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
```

## User report

```{r, info output for user, results='asis'}
cat("Session info text file was updated", sep = "<br>\n")
```

## Preprocessing summary table

```{r, preprocessing summary table}
summary_table_preprocessing %>% paged_table()
```

```{r, document session info into a text file, include=FALSE}
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name,
"/session_info_all.txt"), "a+")
writeLines("######################################################################", connection)
writeLines("############################ Script 6: #############################", connection)
writeLines("######################################################################", connection)
sessioninfo <- sessionInfo()
writeLines("\nR Version:", connection)
writeLines(paste0(" ", sessioninfo$R.version$version.string), connection)
writeLines("\nAttached packages:", connection)
# nicely format packages (with version)
package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")}))
names(package_version) <- NULL
for(i in 1:length(package_version)) {
writeLines(package_version[i], connection)
}
writeLines("\n", connection)
close(connection)
```

```{r runtime end, include=FALSE}
end_time <- Sys.time()
run_time <- as.numeric(round((end_time - start_time), 1), units = "mins")
```

## Completion time

```{r, completion time, results = 'asis'}
cat("The time for the script to run was:", run_time, "minutes", sep = " ")
```

0 comments on commit 76ff990

Please sign in to comment.