From 5f72c19071de7583957b6eac47d825ad99815d77 Mon Sep 17 00:00:00 2001 From: "Vera N. Karlbauer" Date: Fri, 7 Mar 2025 12:06:18 +0100 Subject: [PATCH] Fixed naming of gRatioSet and betas objects --- scripts/6_filtering_cpgs.Rmd | 114 ++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/scripts/6_filtering_cpgs.Rmd b/scripts/6_filtering_cpgs.Rmd index 5319162..865c403 100644 --- a/scripts/6_filtering_cpgs.Rmd +++ b/scripts/6_filtering_cpgs.Rmd @@ -25,25 +25,29 @@ if (!("minfi" %in% rownames(installed.packages()))) { } library(minfi) -knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) knitr::opts_chunk$set(echo = FALSE) ``` -```{r load data, include=FALSE} -summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) -load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) -load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata")) -load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) -load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) -load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata")) +```{r load data 1, include=FALSE} # Info about cross-hybridizing probes comes from Chen et al. 2013, DOI: 10.4161/epi.23470: -load("data/ChenProbeIDs.rdata") +load("../data/ChenProbeIDs.rdata") # Info about mapping inaccuracies for EPIC v2.0 from Illumina product information: if (user_choices$array_type == "v2") { v2_mapping_inaccuracy <- read.csv("../data/EPIC-8v2-0_A1-190MappingInaccuracies.csv")} # Info about flagged probes for EPIC v2.0 from Illumina product information: if (user_choices$array_type == "v2") { v2_flagged_probes <- read.csv("../data/EPIC-8v2-0_A1-FlaggedProbes.csv")} + +knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) +``` + +```{r load data 2, include=FALSE} +summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds")) +load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) +load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata")) +load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) +load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) +load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata")) ``` # Description @@ -127,13 +131,13 @@ if (user_choices$array_type == "v2") { keep_betas_df <- as.data.frame(keep_betas) cat(paste0(nrow(exclude_replicates), " replicate probes were removed")) - dim_gRatioSet_filtered <- dim(gRatioSet_clean) - dim_Betas_filtered <- dim(Betas_clean) + dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean) + dim_Betas_clean_filtered <- dim(Betas_clean) step_number <- c("4", "4") step <- c("Filter replicates", "Filter replicates") data_class <- c("gRatioSet", "Betas") - samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) + samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) + probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -150,7 +154,7 @@ Betas_clean_filtered <- Betas_clean[keep_betas,] # ensure probes are in the same order in cleaned gRatioSet (RGSet with annotations) and detP objects, then subset detP_clean_filtered2 <- detP_clean[match(featureNames(gRatioSet_clean),rownames(detP_clean)),] -keep_gratioset <- rowSums(detP_clean_filtered < 0.01) == ncol(gRatioSet_clean) +keep_gratioset <- rowSums(detP_clean_filtered2 < 0.01) == ncol(gRatioSet_clean) gRatioSet_clean_filtered <- gRatioSet_clean[keep_gratioset,] ``` @@ -159,13 +163,13 @@ keep_betas_df <- as.data.frame(keep_betas) cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " probes failed in one or more samples"), sep = "
\n") -dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) -dim_Betas_filtered <- dim(Betas_clean_filtered) +dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) +dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("5", "5") step <- c("Filter technical", "Filter technical") data_class <- c("gRatioSet", "Betas") -samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) -probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) +samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) +probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -188,13 +192,13 @@ keep_betas_df <- as.data.frame(keep_betas) cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " probes were on the X chromosome"), sep = "
\n") -dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) -dim_Betas_filtered <- dim(Betas_clean_filtered) +dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) +dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("6", "6") step <- c("Filter X Chromosome", "Filter X Chromosome") data_class <- c("gRatioSet", "Betas") -samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) -probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) +samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) +probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -215,13 +219,13 @@ keep_betas_df <- as.data.frame(keep_betas) cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " probes were on the Y chromosome"), sep = "
\n") -dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) -dim_Betas_filtered <- dim(Betas_clean_filtered) +dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) +dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("7", "7") step <- c("Filter Y Chromosome", "Filter Y Chromosome") data_class <- c("gRatioSet", "Betas") -samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) -probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) +samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) +probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -235,13 +239,13 @@ Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in% ``` ```{r, info output for user probes affected by snps, results='asis'} -dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) -dim_Betas_filtered <- dim(gRatioSet_clean_filtered) +dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) +dim_Betas_clean_filtered <- dim(gRatioSet_clean_filtered) step_number <- c("8", "8") step <- c("Filter SNP Affected", "Filter SNP Affected") data_class <- c("gRatioSet", "Betas") -samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) -probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) +samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) +probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -283,13 +287,13 @@ if (user_choices$array_type == "v1") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " probes were cross-hybridizing according to Chen et al."), sep = "
\n") - dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) - dim_Betas_filtered <- dim(Betas_clean_filtered) + dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) + dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("9", "9") step <- c("Filter Cross-Hybridizing Chen", "Filter Cross-Hybridizing Chen") data_class <- c("gRatioSet", "Betas") - samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) + samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) + probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -305,8 +309,8 @@ if (user_choices$array_type == "v1") { keep_betas <- !(rownames(Betas_clean_filtered) %in% exclude_McCartney$V1) Betas_clean_filtered <- Betas_clean_filtered[keep_betas,] - keep_gratioset <- !(featureNames(dim_gRatioSet_filtered) %in% exclude_McCartney$V1) - dim_gRatioSet_filtered <- dim_gRatioSet_filtered[keep_gratioset,] + keep_gratioset <- !(featureNames(dim_gRatioSet_clean_filtered) %in% exclude_McCartney$V1) + gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,] } ``` @@ -316,13 +320,13 @@ if (user_choices$array_type == "v1") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " probes may be cross-hybridizing according to McCartney et al."), sep = "
\n") - dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) - dim_Betas_filtered <- dim(Betas_clean_filtered) + dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) + dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("10", "10") step <- c("Filter Cross-Hybridizing McCartney", "Filter Cross-Hybridizing McCartney") data_class <- c("gRatioSet", "Betas") - samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) + samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) + probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -351,13 +355,13 @@ if (user_choices$array_type == "v1") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " probes were polymorphic"), sep = "
\n") - dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) - dim_Betas_filtered <- dim(Betas_clean_filtered) + dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) + dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("11", "11") step <- c("Filter Polymorphic", "Filter Polymorphic") data_class <- c("gRatioSet", "Betas") - samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) + samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) + probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -369,10 +373,10 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports ```{r, filter out mapping inaccuracies, include=FALSE} if (user_choices$array_type == "v2") { - keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inacc$IlmnID) + keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inaccuracy$IlmnID) Betas_clean_filtered <- Betas_clean_filtered[keep_betas,] - keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inacc$IlmnID) + keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inaccuracy$IlmnID) gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,] } ``` @@ -383,13 +387,13 @@ if (user_choices$array_type == "v2") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " CpGs show known mapping inaccuracies"), sep = "
\n") - dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) - dim_Betas_filtered <- dim(Betas_clean_filtered) + dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) + dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("12", "12") step <- c("Filter Mapping Inaccuracies", "Filter Mapping Inaccuracies") data_class <- c("gRatioSet", "Betas") - samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) + samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) + probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -414,13 +418,13 @@ if (user_choices$array_type == "v2") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " CpGs were flagged by Illumina"), sep = "
\n") - dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) - dim_Betas_filtered <- dim(Betas_clean_filtered) + dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered) + dim_Betas_clean_filtered <- dim(Betas_clean_filtered) step_number <- c("13", "13") step <- c("Filter Flagged Probes", "Filter Flagged Probes") data_class <- c("gRatioSet", "Betas") - samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) + samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2]) + probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -428,7 +432,7 @@ if (user_choices$array_type == "v2") { ``` ```{r, save data, include=FALSE} -save(gRatioSet_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata")) +save(gRatioSet_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata")) Betas_clean_filtered <- getBeta(gRatioSet_clean_filtered) # beta values save(Betas_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata"))