diff --git a/scripts/2_import_raw_DNAm_data.Rmd b/scripts/2_import_raw_DNAm_data.Rmd index b4abfc6..3c84dd1 100644 --- a/scripts/2_import_raw_DNAm_data.Rmd +++ b/scripts/2_import_raw_DNAm_data.Rmd @@ -10,6 +10,10 @@ start_time <- Sys.time() ```{r setup, include=FALSE} needed_packages <- c("BiocManager", "knitr", "dplyr", "rmarkdown") +user_choices <- readRDS("../data/user_choices.rds") +if(user_choices$package_path != "PLEASE FILL IN"){ + .libPaths(c(user_choices$package_path, .libPaths())) +} installed_packages <- needed_packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") @@ -24,8 +28,6 @@ if (any(installed_biocmanager_packages == FALSE)) { } lapply(needed_biocmanager_packages, library, character.only = TRUE) -user_choices <- readRDS("../data/user_choices.rds") - if (user_choices$array_type == "v2") { if (!("jokergoo/IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) { BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest") diff --git a/scripts/3_quality_control.Rmd b/scripts/3_quality_control.Rmd index 6145248..2480c74 100644 --- a/scripts/3_quality_control.Rmd +++ b/scripts/3_quality_control.Rmd @@ -10,6 +10,10 @@ start_time <- Sys.time() ```{r setup, include=FALSE} needed_packages <- c("BiocManager", "dplyr", "knitr", "tidyr", "ggplot2", "ggrepel", "rmarkdown") +user_choices <- readRDS("../data/user_choices.rds") +if(user_choices$package_path != "PLEASE FILL IN"){ + .libPaths(c(user_choices$package_path, .libPaths())) +} installed_packages <- needed_packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") @@ -21,9 +25,9 @@ if (!("minfi" %in% rownames(installed.packages()))) { } library(minfi) -user_choices <- readRDS("../data/user_choices.rds") knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) knitr::opts_chunk$set(echo = FALSE) +phenotype_data <- readRDS(paste0(user_choices$project_name, "/processed_data/phenotype_data.rds")) ``` ```{r load data, include=FALSE} @@ -80,8 +84,8 @@ detP_mean_df <- detP_original %>% detP_plot <- detP_mean_df %>% ggplot(aes(x = personid, y = `Mean Detection p value`)) + geom_col() + - geom_hline(yintercept = user_choices$detP_threshold, color = "darkred") + - geom_label_repel(data = subset(detP_mean_df, `Mean Detection p value` > user_choices$detP_threshold), + geom_hline(yintercept = user_choices$detP_sample_threshold, color = "darkred") + + geom_label_repel(data = subset(detP_mean_df, `Mean Detection p value` > user_choices$detP_sample_threshold), aes(label = personid), size = 3, box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines")) + theme_bw() + @@ -94,7 +98,7 @@ detP_plot The following samples were removed: ```{r, remove poor quality samples with mean detection p value mean > threshold} -keep <- colMeans(detP_original) < user_choices$detP_threshold +keep <- colMeans(detP_original) < user_choices$detP_sample_threshold RGSet_quality <- RGSet_original[,keep] save(RGSet_quality, file = paste0(user_choices$project_name, "/processed_data/RGSet_quality.Rdata")) @@ -106,7 +110,7 @@ detP_quality <- detP_original[,keep] save(detP_quality, file = paste0(user_choices$project_name, "/reports/detP_quality.Rdata")) # display removed samples and corresponding detection p values and save for script 5 -detP_mean_fails_df <- detP_mean_df %>% filter(`Mean Detection p value` > user_choices$detP_threshold) +detP_mean_fails_df <- detP_mean_df %>% filter(`Mean Detection p value` > user_choices$detP_sample_threshold) saveRDS(detP_mean_fails_df, paste0(user_choices$project_name, "/reports/detP_mean_fails_df.rds")) detP_mean_fails_df %>% paged_table() ``` @@ -134,10 +138,18 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports ## Individual Density report ```{r, individual density report} -for (i in 1:ncol(RGSet_quality)) -{titel <- paste(rownames(pData(RGSet_quality))[i]) -densityPlot(as.matrix(Betas_quality[,i]), main = titel) -print(i)} +title_df <- data.frame(rownames(pData(RGSet_quality))) +colnames(title_df) <- "arrayid" +title_df <- title_df %>% + left_join(phenotype_data, by = "arrayid") %>% + dplyr::select(arrayid, personid) + +for (i in 1:ncol(RGSet_quality)){ + title_df_current <- title_df %>% filter(arrayid == rownames(pData(RGSet_quality))[i] %>% pull(personid)) + titel <- paste0(rownames(pData(RGSet_quality))[i], " / ", title_df_current) + densityPlot(as.matrix(Betas_quality[,i]), main = titel) + print(i) + } ``` ```{r, save reports as pdf, include=FALSE} diff --git a/scripts/4_check_matching_of_epigenetic_sex.Rmd b/scripts/4_check_matching_of_epigenetic_sex.Rmd index 8407c04..736dafd 100644 --- a/scripts/4_check_matching_of_epigenetic_sex.Rmd +++ b/scripts/4_check_matching_of_epigenetic_sex.Rmd @@ -10,6 +10,10 @@ start_time <- Sys.time() ```{r setup, include=FALSE} needed_packages <- c("BiocManager", "dplyr", "knitr", "ggplot2", "rmarkdown") +user_choices <- readRDS("../data/user_choices.rds") +if(user_choices$package_path != "PLEASE FILL IN"){ + .libPaths(c(user_choices$package_path, .libPaths())) +} installed_packages <- needed_packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") @@ -21,7 +25,6 @@ if (!("minfi" %in% rownames(installed.packages()))) { } library(minfi) -user_choices <- readRDS("../data/user_choices.rds") knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) knitr::opts_chunk$set(echo = FALSE) ``` @@ -63,8 +66,10 @@ sex_df <- phenotype_data %>% ## Matching of predicted and reported sex -If you have any sex mismatches, they will be displayed in the table above as red and matches will be blue - +If you have any sex mismatches, they will be displayed in the table below as red and matches will be blue +Additionally, a comparison between median total intensities of sex chromosomes is displayed +(samples should be further examined if not clearly clustering with male or female clusters) + ```{r, test sex matches} mismatch_check_df <- sex_df %>% select(reported_sex, predicted_sex) %>% @@ -86,6 +91,18 @@ sex_df %>% dplyr::count(match) %>% kable(table(sex_df$reported_sex, sex_df$predicted_sex), col.names = c("Predicted Female", "Predicted Male"), type = "html", caption = "Comparison of Reported to Predicted Sex") + +plotSex_custom <- function (object, id = NULL) +{ + plot(x = object$predicted_sex_xmedian, y = object$predicted_sex_ymedian, type = "n", xlab = "X chr, median total intensity (log2)", + ylab = "Y chr, median total intensity (log2)") + text(x = object$predicted_sex_xmedian, y = object$predicted_sex_ymedian, labels = id, col = ifelse(object$predicted_sex == + "M", "deepskyblue", "deeppink3")) + legend("bottomleft", c("M", "F"), col = c("deepskyblue", + "deeppink3"), pch = 16) +} + +plotSex_custom(sex_df, id = sex_df$personid) ``` ## Mismatches of predicted and reported sex