Skip to content

Commit

Permalink
package path added to all, personid added to title desity plot script…
Browse files Browse the repository at this point in the history
… 3, sex diagnostic plot added to script 4
  • Loading branch information
NatanYusupov committed May 10, 2024
1 parent cd63070 commit 1099e4f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 14 deletions.
6 changes: 4 additions & 2 deletions scripts/2_import_raw_DNAm_data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ start_time <- Sys.time()

```{r setup, include=FALSE}
needed_packages <- c("BiocManager", "knitr", "dplyr", "rmarkdown")
user_choices <- readRDS("../data/user_choices.rds")
if(user_choices$package_path != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_path, .libPaths()))
}
installed_packages <- needed_packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
Expand All @@ -24,8 +28,6 @@ if (any(installed_biocmanager_packages == FALSE)) {
}
lapply(needed_biocmanager_packages, library, character.only = TRUE)
user_choices <- readRDS("../data/user_choices.rds")
if (user_choices$array_type == "v2") {
if (!("jokergoo/IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) {
BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest")
Expand Down
30 changes: 21 additions & 9 deletions scripts/3_quality_control.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ start_time <- Sys.time()

```{r setup, include=FALSE}
needed_packages <- c("BiocManager", "dplyr", "knitr", "tidyr", "ggplot2", "ggrepel", "rmarkdown")
user_choices <- readRDS("../data/user_choices.rds")
if(user_choices$package_path != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_path, .libPaths()))
}
installed_packages <- needed_packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
Expand All @@ -21,9 +25,9 @@ if (!("minfi" %in% rownames(installed.packages()))) {
}
library(minfi)
user_choices <- readRDS("../data/user_choices.rds")
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
phenotype_data <- readRDS(paste0(user_choices$project_name, "/processed_data/phenotype_data.rds"))
```

```{r load data, include=FALSE}
Expand Down Expand Up @@ -80,8 +84,8 @@ detP_mean_df <- detP_original %>%
detP_plot <- detP_mean_df %>%
ggplot(aes(x = personid, y = `Mean Detection p value`)) +
geom_col() +
geom_hline(yintercept = user_choices$detP_threshold, color = "darkred") +
geom_label_repel(data = subset(detP_mean_df, `Mean Detection p value` > user_choices$detP_threshold),
geom_hline(yintercept = user_choices$detP_sample_threshold, color = "darkred") +
geom_label_repel(data = subset(detP_mean_df, `Mean Detection p value` > user_choices$detP_sample_threshold),
aes(label = personid), size = 3, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) +
theme_bw() +
Expand All @@ -94,7 +98,7 @@ detP_plot
The following samples were removed:

```{r, remove poor quality samples with mean detection p value mean > threshold}
keep <- colMeans(detP_original) < user_choices$detP_threshold
keep <- colMeans(detP_original) < user_choices$detP_sample_threshold
RGSet_quality <- RGSet_original[,keep]
save(RGSet_quality, file = paste0(user_choices$project_name, "/processed_data/RGSet_quality.Rdata"))
Expand All @@ -106,7 +110,7 @@ detP_quality <- detP_original[,keep]
save(detP_quality, file = paste0(user_choices$project_name, "/reports/detP_quality.Rdata"))
# display removed samples and corresponding detection p values and save for script 5
detP_mean_fails_df <- detP_mean_df %>% filter(`Mean Detection p value` > user_choices$detP_threshold)
detP_mean_fails_df <- detP_mean_df %>% filter(`Mean Detection p value` > user_choices$detP_sample_threshold)
saveRDS(detP_mean_fails_df, paste0(user_choices$project_name, "/reports/detP_mean_fails_df.rds"))
detP_mean_fails_df %>% paged_table()
```
Expand Down Expand Up @@ -134,10 +138,18 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports
## Individual Density report

```{r, individual density report}
for (i in 1:ncol(RGSet_quality))
{titel <- paste(rownames(pData(RGSet_quality))[i])
densityPlot(as.matrix(Betas_quality[,i]), main = titel)
print(i)}
title_df <- data.frame(rownames(pData(RGSet_quality)))
colnames(title_df) <- "arrayid"
title_df <- title_df %>%
left_join(phenotype_data, by = "arrayid") %>%
dplyr::select(arrayid, personid)
for (i in 1:ncol(RGSet_quality)){
title_df_current <- title_df %>% filter(arrayid == rownames(pData(RGSet_quality))[i] %>% pull(personid))
titel <- paste0(rownames(pData(RGSet_quality))[i], " / ", title_df_current)
densityPlot(as.matrix(Betas_quality[,i]), main = titel)
print(i)
}
```

```{r, save reports as pdf, include=FALSE}
Expand Down
23 changes: 20 additions & 3 deletions scripts/4_check_matching_of_epigenetic_sex.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ start_time <- Sys.time()

```{r setup, include=FALSE}
needed_packages <- c("BiocManager", "dplyr", "knitr", "ggplot2", "rmarkdown")
user_choices <- readRDS("../data/user_choices.rds")
if(user_choices$package_path != "PLEASE FILL IN"){
.libPaths(c(user_choices$package_path, .libPaths()))
}
installed_packages <- needed_packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org")
Expand All @@ -21,7 +25,6 @@ if (!("minfi" %in% rownames(installed.packages()))) {
}
library(minfi)
user_choices <- readRDS("../data/user_choices.rds")
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
```
Expand Down Expand Up @@ -63,8 +66,10 @@ sex_df <- phenotype_data %>%

## Matching of predicted and reported sex

If you have any sex mismatches, they will be displayed in the table above as red and matches will be blue

If you have any sex mismatches, they will be displayed in the table below as red and matches will be blue
Additionally, a comparison between median total intensities of sex chromosomes is displayed
(samples should be further examined if not clearly clustering with male or female clusters)

```{r, test sex matches}
mismatch_check_df <- sex_df %>%
select(reported_sex, predicted_sex) %>%
Expand All @@ -86,6 +91,18 @@ sex_df %>% dplyr::count(match) %>%
kable(table(sex_df$reported_sex, sex_df$predicted_sex),
col.names = c("Predicted Female", "Predicted Male"), type = "html",
caption = "Comparison of Reported to Predicted Sex")
plotSex_custom <- function (object, id = NULL)
{
plot(x = object$predicted_sex_xmedian, y = object$predicted_sex_ymedian, type = "n", xlab = "X chr, median total intensity (log2)",
ylab = "Y chr, median total intensity (log2)")
text(x = object$predicted_sex_xmedian, y = object$predicted_sex_ymedian, labels = id, col = ifelse(object$predicted_sex ==
"M", "deepskyblue", "deeppink3"))
legend("bottomleft", c("M", "F"), col = c("deepskyblue",
"deeppink3"), pch = 16)
}
plotSex_custom(sex_df, id = sex_df$personid)
```

## Mismatches of predicted and reported sex
Expand Down

0 comments on commit 1099e4f

Please sign in to comment.