Skip to content

Bug fixes #2

Merged
merged 11 commits into from
Mar 19, 2025
Prev Previous commit
Next Next commit
Fixed naming of gRatioSet and betas objects
Vera N. Karlbauer committed Mar 7, 2025
commit 5f72c19071de7583957b6eac47d825ad99815d77
114 changes: 59 additions & 55 deletions scripts/6_filtering_cpgs.Rmd
Original file line number Diff line number Diff line change
@@ -25,25 +25,29 @@ if (!("minfi" %in% rownames(installed.packages()))) {
}
library(minfi)

knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
```

```{r load data, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
```{r load data 1, include=FALSE}
# Info about cross-hybridizing probes comes from Chen et al. 2013, DOI: 10.4161/epi.23470:
load("data/ChenProbeIDs.rdata")
load("../data/ChenProbeIDs.rdata")
# Info about mapping inaccuracies for EPIC v2.0 from Illumina product information:
if (user_choices$array_type == "v2") {
v2_mapping_inaccuracy <- read.csv("../data/EPIC-8v2-0_A1-190MappingInaccuracies.csv")}
# Info about flagged probes for EPIC v2.0 from Illumina product information:
if (user_choices$array_type == "v2") {
v2_flagged_probes <- read.csv("../data/EPIC-8v2-0_A1-FlaggedProbes.csv")}

knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
```

```{r load data 2, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
```

# Description
@@ -127,13 +131,13 @@ if (user_choices$array_type == "v2") {
keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(nrow(exclude_replicates), " replicate probes were removed"))

dim_gRatioSet_filtered <- dim(gRatioSet_clean)
dim_Betas_filtered <- dim(Betas_clean)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean)
dim_Betas_clean_filtered <- dim(Betas_clean)
step_number <- c("4", "4")
step <- c("Filter replicates", "Filter replicates")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -150,7 +154,7 @@ Betas_clean_filtered <- Betas_clean[keep_betas,]

# ensure probes are in the same order in cleaned gRatioSet (RGSet with annotations) and detP objects, then subset
detP_clean_filtered2 <- detP_clean[match(featureNames(gRatioSet_clean),rownames(detP_clean)),]
keep_gratioset <- rowSums(detP_clean_filtered < 0.01) == ncol(gRatioSet_clean)
keep_gratioset <- rowSums(detP_clean_filtered2 < 0.01) == ncol(gRatioSet_clean)
gRatioSet_clean_filtered <- gRatioSet_clean[keep_gratioset,]
```

@@ -159,13 +163,13 @@ keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes failed in one or more samples"), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("5", "5")
step <- c("Filter technical", "Filter technical")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -188,13 +192,13 @@ keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were on the X chromosome"), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("6", "6")
step <- c("Filter X Chromosome", "Filter X Chromosome")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -215,13 +219,13 @@ keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were on the Y chromosome"), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("7", "7")
step <- c("Filter Y Chromosome", "Filter Y Chromosome")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -235,13 +239,13 @@ Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in%
```

```{r, info output for user probes affected by snps, results='asis'}
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(gRatioSet_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(gRatioSet_clean_filtered)
step_number <- c("8", "8")
step <- c("Filter SNP Affected", "Filter SNP Affected")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -283,13 +287,13 @@ if (user_choices$array_type == "v1") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were cross-hybridizing according to Chen et al."), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("9", "9")
step <- c("Filter Cross-Hybridizing Chen", "Filter Cross-Hybridizing Chen")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -305,8 +309,8 @@ if (user_choices$array_type == "v1") {
keep_betas <- !(rownames(Betas_clean_filtered) %in% exclude_McCartney$V1)
Betas_clean_filtered <- Betas_clean_filtered[keep_betas,]

keep_gratioset <- !(featureNames(dim_gRatioSet_filtered) %in% exclude_McCartney$V1)
dim_gRatioSet_filtered <- dim_gRatioSet_filtered[keep_gratioset,]
keep_gratioset <- !(featureNames(dim_gRatioSet_clean_filtered) %in% exclude_McCartney$V1)
gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,]
}
```

@@ -316,13 +320,13 @@ if (user_choices$array_type == "v1") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes may be cross-hybridizing according to McCartney et al."), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("10", "10")
step <- c("Filter Cross-Hybridizing McCartney", "Filter Cross-Hybridizing McCartney")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -351,13 +355,13 @@ if (user_choices$array_type == "v1") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were polymorphic"), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("11", "11")
step <- c("Filter Polymorphic", "Filter Polymorphic")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -369,10 +373,10 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports

```{r, filter out mapping inaccuracies, include=FALSE}
if (user_choices$array_type == "v2") {
keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inacc$IlmnID)
keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inaccuracy$IlmnID)
Betas_clean_filtered <- Betas_clean_filtered[keep_betas,]

keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inacc$IlmnID)
keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inaccuracy$IlmnID)
gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,]
}
```
@@ -383,13 +387,13 @@ if (user_choices$array_type == "v2") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" CpGs show known mapping inaccuracies"), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("12", "12")
step <- c("Filter Mapping Inaccuracies", "Filter Mapping Inaccuracies")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
@@ -414,21 +418,21 @@ if (user_choices$array_type == "v2") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" CpGs were flagged by Illumina"), sep = "<br>\n")

dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("13", "13")
step <- c("Filter Flagged Probes", "Filter Flagged Probes")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])

table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
}
```

```{r, save data, include=FALSE}
save(gRatioSet_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata"))
save(gRatioSet_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata"))

Betas_clean_filtered <- getBeta(gRatioSet_clean_filtered) # beta values
save(Betas_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata"))