Skip to content

Bug fixes #2

Merged
merged 11 commits into from
Mar 19, 2025
Prev Previous commit
Fixed names of beta objects, added dplyr specification to filter func…
…tions
Vera N. Karlbauer committed Mar 7, 2025
commit f1bd3ab1aed78b25961e17d63516ca9b7b27bd73
26 changes: 13 additions & 13 deletions scripts/11_annotate_hg19_create_pseudo_epicv1_from_epicv2.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "Script 11: Create hg19 annotation for EPICv2 and pseudo-EPICv1 version from EPICv2 data"
date: '`r format(Sys.time(), %d %B, %Y")`'
date: '`r format(Sys.time(), "%d %B, %Y")`'
output: html_document
---

@@ -27,11 +27,10 @@ start_time <- Sys.time()

```{r setup, include=FALSE}
user_choices <- readRDS("../data/user_choices.rds")
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)

if(user_choices$package_directory != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_directory, .libPaths()))
if(user_choices$package_path != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_path, .libPaths()))
}

needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "minfi", "janitor")
@@ -57,6 +56,7 @@ load(paste0(user_choices$project_name, "/reports/annotations_clean_unfiltered.Rd
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
annotation_hg19 <- read.table(paste0(user_choices$personal_path, "/epic_preprocessing_k2h/data/EPICv2.hg19.manifest.tsv"))
annotation_hg19 <- row_to_names(annotation_hg19, row_number = 1, remove_row = TRUE, remove_rows_above = TRUE)
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
```

```{r, check array type, include = TRUE}
@@ -68,7 +68,7 @@ if (user_choices$array_type == "v1") {

```{r, hg19 annotation filtered data, include = FALSE}
annations_clean_filtered_hg19 <- annotation_hg19 %>%
dplyr::filter(Probe_ID %in% rownames(Betas_clean_quantile_bmiq_filtered_combated))
dplyr::filter(Probe_ID %in% rownames(Betas_clean_filtered_quantile_bmiq_combated))
save(annations_clean_filtered_hg19, file = paste0(user_choices$project_name, "/reports/annations_clean_filtered_hg19.Rdata"))
```

@@ -81,10 +81,10 @@ save(annotions_clean_unfiltered_hg19, file = paste0(user_choices$project_name, "
```{r, create pseudo-EPICv1 version for filtered data, include=FALSE}
# create annotation only containing filtered CpGs that are also on v1 and exclude any duplicated loci
annotations_clean_filtered_pseudo_v1 <- annotations_clean_filtered %>%
filter(Name %in% rownames(Betas_clean_filtered_quantile_bmiq_combated)) %>%
dplyr::filter(Name %in% rownames(Betas_clean_filtered_quantile_bmiq_combated)) %>%
# only keep loci on EPICv1, filter wrongly annotated empty value
filter(EPICv1_Loci != "") %>%
filter(duplicated(EPICv1_Loci) == FALSE)
dplyr::filter(EPICv1_Loci != "") %>%
dplyr::filter(duplicated(EPICv1_Loci) == FALSE)

# only keep betas that are on v1 and exclude any duplicated loci (keep first CpG)
keep_betas <- (rownames(Betas_clean_filtered_quantile_bmiq_combated) %in% annotations_clean_filtered_pseudo_v1$Name)
@@ -103,7 +103,7 @@ save(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_c
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" filtered CpGs were not found on EPICv1 and therefore excluded"), sep = "<br>\n")

dim_Betas_filtered <- dim(Betas_clean_quantile_bmiq_filtered_combated_pseudo_v1)
dim_Betas_filtered <- dim(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1)
step_number <- c("14")
step <- c("Create pseudo-EPICv1 version (filtered data)")
data_class <- c("Betas")
@@ -117,9 +117,9 @@ save(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_c
```{r, create pseudo-EPICv1 version for unfiltered data, include=FALSE}
# create annotation only containing unfiltered CpGs that are also on v1 and exclude any duplicated loci
annotations_clean_unfiltered_pseudo_v1 <- annotations_clean_unfiltered %>%
filter(Name %in% rownames(Betas_clean_unfiltered_quantile_bmiq_combated)) %>%
filter(EPICv1_Loci != "") %>%
filter(duplicated(EPICv1_Loci) == FALSE)
dplyr::filter(Name %in% rownames(Betas_clean_unfiltered_quantile_bmiq_combated)) %>%
dplyr::filter(EPICv1_Loci != "") %>%
dplyr::filter(duplicated(EPICv1_Loci) == FALSE)

# only keep betas that are on v1 and exclude any duplicated loci (keep first CpG)
keep_betas <- (rownames(Betas_clean_unfiltered_quantile_bmiq_combated) %in% annotations_clean_unfiltered_pseudo_v1$Name)
@@ -138,7 +138,7 @@ save(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1, file = paste0(user
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" unfiltered CpGs were not found on EPICv1 and therefore excluded"), sep = "<br>\n")

dim_Betas_unfiltered <- dim(Betas_clean_quantile_bmiq_unfiltered_combated_pseudo_v1)
dim_Betas_unfiltered <- dim(Betas_clean_unfiltered_quantile_bmiq_combated_pseudo_v1)
step_number <- c("15")
step <- c("Create pseudo-EPICv1 version (unfiltered data)")
data_class <- c("Betas")