From 1099e4f8ebeb30b65e17d97b94270f7974f90265 Mon Sep 17 00:00:00 2001
From: Natan Yusupov <natan_yusupov@psych.mpg.de>
Date: Fri, 10 May 2024 10:53:42 +0200
Subject: [PATCH] package path added to all, personid added to title desity
 plot script 3, sex diagnostic plot added to script 4

---
 scripts/2_import_raw_DNAm_data.Rmd            |  6 ++--
 scripts/3_quality_control.Rmd                 | 30 +++++++++++++------
 .../4_check_matching_of_epigenetic_sex.Rmd    | 23 ++++++++++++--
 3 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/scripts/2_import_raw_DNAm_data.Rmd b/scripts/2_import_raw_DNAm_data.Rmd
index b4abfc6..3c84dd1 100644
--- a/scripts/2_import_raw_DNAm_data.Rmd
+++ b/scripts/2_import_raw_DNAm_data.Rmd
@@ -10,6 +10,10 @@ start_time <- Sys.time()
 
 ```{r setup, include=FALSE}
 needed_packages <- c("BiocManager", "knitr", "dplyr", "rmarkdown")
+user_choices <- readRDS("../data/user_choices.rds")
+if(user_choices$package_path != "PLEASE FILL IN"){
+  .libPaths(c(user_choices$package_path, .libPaths()))
+}
 installed_packages <- needed_packages %in% rownames(installed.packages())
 if (any(installed_packages == FALSE)) {
   install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
@@ -24,8 +28,6 @@ if (any(installed_biocmanager_packages == FALSE)) {
 }
 lapply(needed_biocmanager_packages, library, character.only = TRUE)
 
-user_choices <- readRDS("../data/user_choices.rds")
-
 if (user_choices$array_type == "v2") {
   if (!("jokergoo/IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) {
     BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest")
diff --git a/scripts/3_quality_control.Rmd b/scripts/3_quality_control.Rmd
index 6145248..2480c74 100644
--- a/scripts/3_quality_control.Rmd
+++ b/scripts/3_quality_control.Rmd
@@ -10,6 +10,10 @@ start_time <- Sys.time()
 
 ```{r setup, include=FALSE}
 needed_packages <- c("BiocManager", "dplyr", "knitr", "tidyr", "ggplot2", "ggrepel", "rmarkdown")
+user_choices <- readRDS("../data/user_choices.rds")
+if(user_choices$package_path != "PLEASE FILL IN"){
+  .libPaths(c(user_choices$package_path, .libPaths()))
+}
 installed_packages <- needed_packages %in% rownames(installed.packages())
 if (any(installed_packages == FALSE)) {
   install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
@@ -21,9 +25,9 @@ if (!("minfi" %in% rownames(installed.packages()))) {
 }
 library(minfi)
 
-user_choices <- readRDS("../data/user_choices.rds")
 knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
 knitr::opts_chunk$set(echo = FALSE)
+phenotype_data <- readRDS(paste0(user_choices$project_name, "/processed_data/phenotype_data.rds"))
 ```
 
 ```{r load data, include=FALSE}
@@ -80,8 +84,8 @@ detP_mean_df <- detP_original %>%
 detP_plot <- detP_mean_df %>%
   ggplot(aes(x = personid, y = `Mean Detection p value`)) +
   geom_col() +
-  geom_hline(yintercept = user_choices$detP_threshold, color = "darkred") +
-  geom_label_repel(data = subset(detP_mean_df, `Mean Detection p value` > user_choices$detP_threshold), 
+  geom_hline(yintercept = user_choices$detP_sample_threshold, color = "darkred") +
+  geom_label_repel(data = subset(detP_mean_df, `Mean Detection p value` > user_choices$detP_sample_threshold), 
                    aes(label = personid), size = 3, box.padding = unit(0.35, "lines"), 
                    point.padding = unit(0.3, "lines")) +
   theme_bw() +
@@ -94,7 +98,7 @@ detP_plot
 The following samples were removed:
 
 ```{r, remove poor quality samples with mean detection p value mean > threshold}
-keep <- colMeans(detP_original) < user_choices$detP_threshold
+keep <- colMeans(detP_original) < user_choices$detP_sample_threshold
 
 RGSet_quality <- RGSet_original[,keep]
 save(RGSet_quality, file = paste0(user_choices$project_name, "/processed_data/RGSet_quality.Rdata"))
@@ -106,7 +110,7 @@ detP_quality <- detP_original[,keep]
 save(detP_quality, file = paste0(user_choices$project_name, "/reports/detP_quality.Rdata"))
 
 # display removed samples and corresponding detection p values and save for script 5
-detP_mean_fails_df <- detP_mean_df %>% filter(`Mean Detection p value` > user_choices$detP_threshold)
+detP_mean_fails_df <- detP_mean_df %>% filter(`Mean Detection p value` > user_choices$detP_sample_threshold)
 saveRDS(detP_mean_fails_df, paste0(user_choices$project_name, "/reports/detP_mean_fails_df.rds"))
 detP_mean_fails_df %>% paged_table()
 ```
@@ -134,10 +138,18 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports
 ## Individual Density report
 
 ```{r, individual density report}
-for (i in 1:ncol(RGSet_quality))
-{titel <- paste(rownames(pData(RGSet_quality))[i])
-densityPlot(as.matrix(Betas_quality[,i]), main = titel)
-print(i)}
+title_df <- data.frame(rownames(pData(RGSet_quality)))
+colnames(title_df) <- "arrayid"
+title_df <- title_df %>%
+  left_join(phenotype_data, by = "arrayid") %>%
+  dplyr::select(arrayid, personid)
+
+for (i in 1:ncol(RGSet_quality)){ 
+  title_df_current <- title_df %>% filter(arrayid == rownames(pData(RGSet_quality))[i] %>% pull(personid))
+  titel <- paste0(rownames(pData(RGSet_quality))[i], " / ", title_df_current)
+  densityPlot(as.matrix(Betas_quality[,i]), main = titel)
+  print(i)
+  }
 ```
 
 ```{r, save reports as pdf, include=FALSE}
diff --git a/scripts/4_check_matching_of_epigenetic_sex.Rmd b/scripts/4_check_matching_of_epigenetic_sex.Rmd
index 8407c04..736dafd 100644
--- a/scripts/4_check_matching_of_epigenetic_sex.Rmd
+++ b/scripts/4_check_matching_of_epigenetic_sex.Rmd
@@ -10,6 +10,10 @@ start_time <- Sys.time()
 
 ```{r setup, include=FALSE}
 needed_packages <- c("BiocManager", "dplyr", "knitr", "ggplot2", "rmarkdown")
+user_choices <- readRDS("../data/user_choices.rds")
+if(user_choices$package_path != "PLEASE FILL IN"){
+  .libPaths(c(user_choices$package_path, .libPaths()))
+}
 installed_packages <- needed_packages %in% rownames(installed.packages())
 if (any(installed_packages == FALSE)) {
   install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
@@ -21,7 +25,6 @@ if (!("minfi" %in% rownames(installed.packages()))) {
 }
 library(minfi)
 
-user_choices <- readRDS("../data/user_choices.rds")
 knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
 knitr::opts_chunk$set(echo = FALSE)
 ```
@@ -63,8 +66,10 @@ sex_df <- phenotype_data %>%
 
 ## Matching of predicted and reported sex
 
-If you have any sex mismatches, they will be displayed in the table above as red and matches will be blue
-    
+If you have any sex mismatches, they will be displayed in the table below as red and matches will be blue  
+Additionally, a comparison between median total intensities of sex chromosomes is displayed 
+(samples should be further examined if not clearly clustering with male or female clusters)  
+
 ```{r, test sex matches}
 mismatch_check_df <- sex_df %>%
   select(reported_sex, predicted_sex) %>%
@@ -86,6 +91,18 @@ sex_df %>% dplyr::count(match) %>%
 kable(table(sex_df$reported_sex, sex_df$predicted_sex), 
       col.names = c("Predicted Female", "Predicted Male"), type = "html",
       caption = "Comparison of Reported to Predicted Sex")
+
+plotSex_custom <- function (object, id = NULL) 
+{
+  plot(x = object$predicted_sex_xmedian, y = object$predicted_sex_ymedian, type = "n", xlab = "X chr, median total intensity (log2)", 
+    ylab = "Y chr, median total intensity (log2)")
+  text(x = object$predicted_sex_xmedian, y = object$predicted_sex_ymedian, labels = id, col = ifelse(object$predicted_sex == 
+    "M", "deepskyblue", "deeppink3"))
+  legend("bottomleft", c("M", "F"), col = c("deepskyblue", 
+    "deeppink3"), pch = 16)
+}
+
+plotSex_custom(sex_df, id = sex_df$personid)
 ```
 
 ## Mismatches of predicted and reported sex