diff --git a/scripts/6_filtering_cpgs.Rmd b/scripts/6_filtering_cpgs.Rmd index 2cd42c2..14b0004 100644 --- a/scripts/6_filtering_cpgs.Rmd +++ b/scripts/6_filtering_cpgs.Rmd @@ -69,67 +69,74 @@ annotations_clean = getAnnotation(RGSet_clean) ### Detect replicates (v2 only) ```{r, detect replicates} -# add non-unique CpG name -CpG_names <- as.data.frame(rownames(Betas_clean)) -colnames(CpG_names) <- c("IlmnID") -CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, 1, 10) -# isolate replicate IDs -replicates <- CpG_names %>% - group_by(CpG_name) %>% - filter(n()>1) %>% - distinct(CpG_name) - -# link to detP values -CpG_names_detP <- left_join(CpG_names, as.data.frame(cbind(rownames(detP_clean), rowMeans(detP_clean))), by = join_by("IlmnID" == "V1")) -colnames(CpG_names_detP) <- c("IlmnID", "CpG_name", "detP_value") - -cat(paste0(nrow(replicates), " replicate CpGs were detected")) +if (user_choices$array_type == "v2") { + # add non-unique CpG name + CpG_names <- as.data.frame(rownames(Betas_clean)) + colnames(CpG_names) <- c("IlmnID") + CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, 1, 10) + # isolate replicate IDs + replicates <- CpG_names %>% + group_by(CpG_name) %>% + filter(n()>1) %>% + distinct(CpG_name) + + # link to detP values + CpG_names_detP <- left_join(CpG_names, as.data.frame(cbind(rownames(detP_clean), + rowMeans(detP_clean))), by = join_by("IlmnID" == "V1")) + colnames(CpG_names_detP) <- c("IlmnID", "CpG_name", "detP_value") + + cat(paste0(nrow(replicates), " replicate CpGs were detected")) + } ``` ```{r, exclude replicates and save data, include = FALSE} -## create list of replicates with lowest detP values, if detPs are equal, take first replicate -keep_replicates <- CpG_names_detP %>% - filter(CpG_name %in% replicates$CpG_name) %>% - group_by(CpG_name) %>% - slice_min(detP_value) %>% - ungroup() -keep_replicates <- keep_replicates %>% - filter(duplicated(CpG_name) == FALSE) - -# create list of replicates to exclude (CpGs in replicate list but probe not in keep list) -exclude_replicates <- CpG_names_detP %>% - filter(CpG_name %in% replicates$CpG_name) %>% - filter(!(IlmnID %in% keep_replicates$IlmnID)) +if (user_choices$array_type == "v2") { + # create list of replicates with lowest detP values, if detPs are equal, take first replicate + keep_replicates <- CpG_names_detP %>% + filter(CpG_name %in% replicates$CpG_name) %>% + group_by(CpG_name) %>% + slice_min(detP_value) %>% + ungroup() + keep_replicates <- keep_replicates %>% + filter(duplicated(CpG_name) == FALSE) -# exclude replicates from betas, RGSet, detP and save data -RGSet_clean <- subsetByLoci(RGSet_clean, excludeLoci = exclude_replicates$IlmnID) -save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) - -keep_betas <- !(rownames(Betas_clean) %in% exclude_replicates$IlmnID) -Betas_clean <- Betas_clean[keep_betas,] -save(Betas_clean, file = paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) - -keep_detP <- !(rownames(detP_clean) %in% exclude_replicates$IlmnID) -detP_clean <- detP_clean[keep_detP,] -save(detP_clean, file = paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata")) + # create list of replicates to exclude (CpGs in replicate list but probe not in keep list) + exclude_replicates <- CpG_names_detP %>% + filter(CpG_name %in% replicates$CpG_name) %>% + filter(!(IlmnID %in% keep_replicates$IlmnID)) + + # exclude replicates from betas, RGSet, detP and save data + RGSet_clean <- subsetByLoci(RGSet_clean, excludeLoci = exclude_replicates$IlmnID) + save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) + + keep_betas <- !(rownames(Betas_clean) %in% exclude_replicates$IlmnID) + Betas_clean <- Betas_clean[keep_betas,] + save(Betas_clean, file = paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) + + keep_detP <- !(rownames(detP_clean) %in% exclude_replicates$IlmnID) + detP_clean <- detP_clean[keep_detP,] + save(detP_clean, file = paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata")) + } ``` ### Exclude replicates ```{r, info output for replicate removal, results='asis'} -keep_betas_df <- as.data.frame(keep_betas) -cat(paste0(nrow(exclude_replicates), " replicate probes were removed")) - -dim_RGSet_filtered <- dim(RGSet_clean) -dim_Betas_filtered <- dim(Betas_clean) -step_number <- c("4", "4") -step <- c("Filter replicates", "Filter replicates") -data_class <- c("RGSet", "Betas") -samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2]) -probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1]) +if (user_choices$array_type == "v2") { + keep_betas_df <- as.data.frame(keep_betas) + cat(paste0(nrow(exclude_replicates), " replicate probes were removed")) + + dim_RGSet_filtered <- dim(RGSet_clean) + dim_Betas_filtered <- dim(Betas_clean) + step_number <- c("4", "4") + step <- c("Filter replicates", "Filter replicates") + data_class <- c("RGSet", "Betas") + samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2]) + probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1]) -table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) -summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) + table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) + summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) + } ``` ## Removal of failed probes in one or more samples