Skip to content

Commit

Permalink
Only perform replicate removal for EPICv2 array
Browse files Browse the repository at this point in the history
  • Loading branch information
Vera N. Karlbauer committed Aug 13, 2024
1 parent d2f54e2 commit 41bbc5f
Showing 1 changed file with 58 additions and 51 deletions.
109 changes: 58 additions & 51 deletions scripts/6_filtering_cpgs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -69,67 +69,74 @@ annotations_clean = getAnnotation(RGSet_clean)
### Detect replicates (v2 only)

```{r, detect replicates}
# add non-unique CpG name
CpG_names <- as.data.frame(rownames(Betas_clean))
colnames(CpG_names) <- c("IlmnID")
CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, 1, 10)
# isolate replicate IDs
replicates <- CpG_names %>%
group_by(CpG_name) %>%
filter(n()>1) %>%
distinct(CpG_name)
# link to detP values
CpG_names_detP <- left_join(CpG_names, as.data.frame(cbind(rownames(detP_clean), rowMeans(detP_clean))), by = join_by("IlmnID" == "V1"))
colnames(CpG_names_detP) <- c("IlmnID", "CpG_name", "detP_value")
cat(paste0(nrow(replicates), " replicate CpGs were detected"))
if (user_choices$array_type == "v2") {
# add non-unique CpG name
CpG_names <- as.data.frame(rownames(Betas_clean))
colnames(CpG_names) <- c("IlmnID")
CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, 1, 10)
# isolate replicate IDs
replicates <- CpG_names %>%
group_by(CpG_name) %>%
filter(n()>1) %>%
distinct(CpG_name)
# link to detP values
CpG_names_detP <- left_join(CpG_names, as.data.frame(cbind(rownames(detP_clean),
rowMeans(detP_clean))), by = join_by("IlmnID" == "V1"))
colnames(CpG_names_detP) <- c("IlmnID", "CpG_name", "detP_value")
cat(paste0(nrow(replicates), " replicate CpGs were detected"))
}
```

```{r, exclude replicates and save data, include = FALSE}
## create list of replicates with lowest detP values, if detPs are equal, take first replicate
keep_replicates <- CpG_names_detP %>%
filter(CpG_name %in% replicates$CpG_name) %>%
group_by(CpG_name) %>%
slice_min(detP_value) %>%
ungroup()
keep_replicates <- keep_replicates %>%
filter(duplicated(CpG_name) == FALSE)
# create list of replicates to exclude (CpGs in replicate list but probe not in keep list)
exclude_replicates <- CpG_names_detP %>%
filter(CpG_name %in% replicates$CpG_name) %>%
filter(!(IlmnID %in% keep_replicates$IlmnID))
if (user_choices$array_type == "v2") {
# create list of replicates with lowest detP values, if detPs are equal, take first replicate
keep_replicates <- CpG_names_detP %>%
filter(CpG_name %in% replicates$CpG_name) %>%
group_by(CpG_name) %>%
slice_min(detP_value) %>%
ungroup()
keep_replicates <- keep_replicates %>%
filter(duplicated(CpG_name) == FALSE)
# exclude replicates from betas, RGSet, detP and save data
RGSet_clean <- subsetByLoci(RGSet_clean, excludeLoci = exclude_replicates$IlmnID)
save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
keep_betas <- !(rownames(Betas_clean) %in% exclude_replicates$IlmnID)
Betas_clean <- Betas_clean[keep_betas,]
save(Betas_clean, file = paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
keep_detP <- !(rownames(detP_clean) %in% exclude_replicates$IlmnID)
detP_clean <- detP_clean[keep_detP,]
save(detP_clean, file = paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
# create list of replicates to exclude (CpGs in replicate list but probe not in keep list)
exclude_replicates <- CpG_names_detP %>%
filter(CpG_name %in% replicates$CpG_name) %>%
filter(!(IlmnID %in% keep_replicates$IlmnID))
# exclude replicates from betas, RGSet, detP and save data
RGSet_clean <- subsetByLoci(RGSet_clean, excludeLoci = exclude_replicates$IlmnID)
save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
keep_betas <- !(rownames(Betas_clean) %in% exclude_replicates$IlmnID)
Betas_clean <- Betas_clean[keep_betas,]
save(Betas_clean, file = paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
keep_detP <- !(rownames(detP_clean) %in% exclude_replicates$IlmnID)
detP_clean <- detP_clean[keep_detP,]
save(detP_clean, file = paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
}
```

### Exclude replicates

```{r, info output for replicate removal, results='asis'}
keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(nrow(exclude_replicates), " replicate probes were removed"))
dim_RGSet_filtered <- dim(RGSet_clean)
dim_Betas_filtered <- dim(Betas_clean)
step_number <- c("4", "4")
step <- c("Filter replicates", "Filter replicates")
data_class <- c("RGSet", "Betas")
samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1])
if (user_choices$array_type == "v2") {
keep_betas_df <- as.data.frame(keep_betas)
cat(paste0(nrow(exclude_replicates), " replicate probes were removed"))
dim_RGSet_filtered <- dim(RGSet_clean)
dim_Betas_filtered <- dim(Betas_clean)
step_number <- c("4", "4")
step <- c("Filter replicates", "Filter replicates")
data_class <- c("RGSet", "Betas")
samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2])
probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1])
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
}
```

## Removal of failed probes in one or more samples
Expand Down

0 comments on commit 41bbc5f

Please sign in to comment.