Skip to content

Commit

Permalink
Fixed naming of gRatioSet and betas objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Vera N. Karlbauer committed Mar 7, 2025
1 parent 83cdc3c commit 5f72c19
Showing 1 changed file with 59 additions and 55 deletions.
114 changes: 59 additions & 55 deletions scripts/6_filtering_cpgs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,29 @@ if (!("minfi" %in% rownames(installed.packages()))) {
}
library(minfi)
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
knitr::opts_chunk$set(echo = FALSE)
```

```{r load data, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
```{r load data 1, include=FALSE}
# Info about cross-hybridizing probes comes from Chen et al. 2013, DOI: 10.4161/epi.23470:
load("data/ChenProbeIDs.rdata")
load("../data/ChenProbeIDs.rdata")
# Info about mapping inaccuracies for EPIC v2.0 from Illumina product information:
if (user_choices$array_type == "v2") {
v2_mapping_inaccuracy <- read.csv("../data/EPIC-8v2-0_A1-190MappingInaccuracies.csv")}
# Info about flagged probes for EPIC v2.0 from Illumina product information:
if (user_choices$array_type == "v2") {
v2_flagged_probes <- read.csv("../data/EPIC-8v2-0_A1-FlaggedProbes.csv")}
knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/"))
```

```{r load data 2, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
```

# Description
Expand Down Expand Up @@ -127,13 +131,13 @@ if (user_choices$array_type == "v2") {
keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(nrow(exclude_replicates), " replicate probes were removed"))
dim_gRatioSet_filtered <- dim(gRatioSet_clean)
dim_Betas_filtered <- dim(Betas_clean)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean)
dim_Betas_clean_filtered <- dim(Betas_clean)
step_number <- c("4", "4")
step <- c("Filter replicates", "Filter replicates")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -150,7 +154,7 @@ Betas_clean_filtered <- Betas_clean[keep_betas,]
# ensure probes are in the same order in cleaned gRatioSet (RGSet with annotations) and detP objects, then subset
detP_clean_filtered2 <- detP_clean[match(featureNames(gRatioSet_clean),rownames(detP_clean)),]
keep_gratioset <- rowSums(detP_clean_filtered < 0.01) == ncol(gRatioSet_clean)
keep_gratioset <- rowSums(detP_clean_filtered2 < 0.01) == ncol(gRatioSet_clean)
gRatioSet_clean_filtered <- gRatioSet_clean[keep_gratioset,]
```

Expand All @@ -159,13 +163,13 @@ keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes failed in one or more samples"), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("5", "5")
step <- c("Filter technical", "Filter technical")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -188,13 +192,13 @@ keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were on the X chromosome"), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("6", "6")
step <- c("Filter X Chromosome", "Filter X Chromosome")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -215,13 +219,13 @@ keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were on the Y chromosome"), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("7", "7")
step <- c("Filter Y Chromosome", "Filter Y Chromosome")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -235,13 +239,13 @@ Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in%
```

```{r, info output for user probes affected by snps, results='asis'}
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(gRatioSet_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(gRatioSet_clean_filtered)
step_number <- c("8", "8")
step <- c("Filter SNP Affected", "Filter SNP Affected")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand Down Expand Up @@ -283,13 +287,13 @@ if (user_choices$array_type == "v1") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were cross-hybridizing according to Chen et al."), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("9", "9")
step <- c("Filter Cross-Hybridizing Chen", "Filter Cross-Hybridizing Chen")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -305,8 +309,8 @@ if (user_choices$array_type == "v1") {
keep_betas <- !(rownames(Betas_clean_filtered) %in% exclude_McCartney$V1)
Betas_clean_filtered <- Betas_clean_filtered[keep_betas,]
keep_gratioset <- !(featureNames(dim_gRatioSet_filtered) %in% exclude_McCartney$V1)
dim_gRatioSet_filtered <- dim_gRatioSet_filtered[keep_gratioset,]
keep_gratioset <- !(featureNames(dim_gRatioSet_clean_filtered) %in% exclude_McCartney$V1)
gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,]
}
```

Expand All @@ -316,13 +320,13 @@ if (user_choices$array_type == "v1") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes may be cross-hybridizing according to McCartney et al."), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("10", "10")
step <- c("Filter Cross-Hybridizing McCartney", "Filter Cross-Hybridizing McCartney")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand Down Expand Up @@ -351,13 +355,13 @@ if (user_choices$array_type == "v1") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" probes were polymorphic"), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("11", "11")
step <- c("Filter Polymorphic", "Filter Polymorphic")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -369,10 +373,10 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports

```{r, filter out mapping inaccuracies, include=FALSE}
if (user_choices$array_type == "v2") {
keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inacc$IlmnID)
keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inaccuracy$IlmnID)
Betas_clean_filtered <- Betas_clean_filtered[keep_betas,]
keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inacc$IlmnID)
keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inaccuracy$IlmnID)
gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,]
}
```
Expand All @@ -383,13 +387,13 @@ if (user_choices$array_type == "v2") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" CpGs show known mapping inaccuracies"), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("12", "12")
step <- c("Filter Mapping Inaccuracies", "Filter Mapping Inaccuracies")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
Expand All @@ -414,21 +418,21 @@ if (user_choices$array_type == "v2") {
cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
" CpGs were flagged by Illumina"), sep = "<br>\n")
dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_filtered <- dim(Betas_clean_filtered)
dim_gRatioSet_clean_filtered <- dim(gRatioSet_clean_filtered)
dim_Betas_clean_filtered <- dim(Betas_clean_filtered)
step_number <- c("13", "13")
step <- c("Filter Flagged Probes", "Filter Flagged Probes")
data_class <- c("gRatioSet", "Betas")
samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
samples <- c(dim_gRatioSet_clean_filtered[2], dim_Betas_clean_filtered[2])
probes <- c(dim_gRatioSet_clean_filtered[1], dim_Betas_clean_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
}
```

```{r, save data, include=FALSE}
save(gRatioSet_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata"))
save(gRatioSet_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata"))
Betas_clean_filtered <- getBeta(gRatioSet_clean_filtered) # beta values
save(Betas_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata"))
Expand Down

0 comments on commit 5f72c19

Please sign in to comment.