diff --git a/scripts/11_optional_create_pseudo_epicv1_version_for_v2.Rmd b/scripts/11_annotate_hg19_create_pseudo_epicv1_from_epicv2.Rmd
similarity index 90%
rename from scripts/11_optional_create_pseudo_epicv1_version_for_v2.Rmd
rename to scripts/11_annotate_hg19_create_pseudo_epicv1_from_epicv2.Rmd
index f98128e..2c57f79 100644
--- a/scripts/11_optional_create_pseudo_epicv1_version_for_v2.Rmd
+++ b/scripts/11_annotate_hg19_create_pseudo_epicv1_from_epicv2.Rmd
@@ -1,23 +1,25 @@
---
-title: "Script 11 (optional): Pseudo-EPICv1 version of EPICv2 data"
-date: '`r format(Sys.time(), "%d %B, %Y")`'
+title: "Script 11: Create hg19 annotation for EPICv2 and pseudo-EPICv1 version from EPICv2 data"
+date: '`r format(Sys.time(), %d %B, %Y")`'
output: html_document
---
# Description
This script
-* creates a hg19 annotation of filtered and unfiltered data from the EPICv2 array (instead of the hg38 annotation
-provided by Illumina). The hg19 annotation is taken from: http://zwdzwd.github.io/InfiniumAnnotation/EPIC_hm450_hg19.html
-* creates a "pseudo-EPICv1" version of EPIC v2 data which only includes the probes found on the EPICv1 array.
-The probe names are replaced with the probe names from the EPICv1 array.
-Both of these steps are performed for filtered and unfiltered data.
-The pseudo-EPICv1 version is useful when using software designed for EPICv1, e.g. for clock calculation via methylclock or cell type estimation via EpiDISH.
+* creates a hg19 annotation for filtered and unfiltered data of the EPICv2 array (instead of the original hg38 annotation provided by Illumina). The hg19 annotation is taken from: http://zwdzwd.github.io/InfiniumAnnotation/EPIC_hm450_hg19.html
+* creates a "pseudo-EPICv1" version of EPICv2 data which only includes the probes existing on the EPICv1 array.
+The probe names from EPICv2 are replaced with the probe names of the EPICv1 array
+
+**Notes:**
+Both of these steps are performed for filtered and unfiltered data
+The pseudo-EPICv1 version is useful when using software designed for EPICv1, e.g. for clock calculation via methylclock or cell type estimation via EpiDISH.
+
**Instructions**
-Please only run this script if your data was generated using the EPICv2 array.
+This script is only intended for EPICv2 data. Do not run it on EPICv1 data.
```{r runtime start, include=FALSE}
start_time <- Sys.time()
@@ -59,7 +61,7 @@ annotation_hg19 <- row_to_names(annotation_hg19, row_number = 1, remove_row = TR
```{r, check array type, include = TRUE}
if (user_choices$array_type == "v1") {
- cat("Please do not run script 11 if your data was generated using an EPICv1 array")
+ cat("Error: You are running this script on EPICv1 data. The script is therefore terminated.")
knit_exit()
}
```
@@ -80,6 +82,7 @@ save(annotions_clean_unfiltered_hg19, file = paste0(user_choices$project_name, "
# create annotation only containing filtered CpGs that are also on v1 and exclude any duplicated loci
annotations_clean_filtered_pseudo_v1 <- annotations_clean_filtered %>%
filter(Name %in% rownames(Betas_clean_filtered_quantile_bmiq_combated)) %>%
+ # only keep loci on EPICv1, filter wrongly annotated empty value
filter(EPICv1_Loci != "") %>%
filter(duplicated(EPICv1_Loci) == FALSE)
@@ -109,7 +112,6 @@ save(Betas_clean_filtered_quantile_bmiq_combated_pseudo_v1, file = paste0(user_c
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
-
```
```{r, create pseudo-EPICv1 version for unfiltered data, include=FALSE}
@@ -163,7 +165,7 @@ summary_table_preprocessing %>% paged_table()
connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name,
"/session_info_all.txt"), "a+")
writeLines("######################################################################", connection)
-writeLines("############################ Script 6: #############################", connection)
+writeLines("############################ Script 11: ##############################", connection)
writeLines("######################################################################", connection)
sessioninfo <- sessionInfo()
writeLines("\nR Version:", connection)