diff --git a/scripts/6_filtering_cpgs.Rmd b/scripts/6_filtering_cpgs.Rmd index db4c677..5319162 100644 --- a/scripts/6_filtering_cpgs.Rmd +++ b/scripts/6_filtering_cpgs.Rmd @@ -59,7 +59,7 @@ We will remove probes that: * cross-hybridize as described by McCartney et al. (v1.0 only) * are polymorphic (v1.0 only) * show mapping inaccuracies or were flagged by Illumina (v2 only) -Please note: replicate removal is performed for the filtered AND unfiltered data (v2 only. +Please note: replicate removal is performed for the filtered AND unfiltered data (v2 only). ```{r get annotation of probes, include=FALSE} annotations_clean = getAnnotation(RGSet_clean) @@ -73,11 +73,11 @@ if (user_choices$array_type == "v2") { # add non-unique CpG name CpG_names <- as.data.frame(rownames(Betas_clean)) colnames(CpG_names) <- c("IlmnID") - CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, 1, 10) + CpG_names$CpG_name <- sub("\\_.*", "", CpG_names$IlmnI) + # CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, end = -6) # isolate replicate IDs replicates <- CpG_names %>% - group_by(CpG_name) %>% - filter(n()>1) %>% + filter(duplicated(CpG_name)) %>% distinct(CpG_name) # link to detP values @@ -85,7 +85,7 @@ if (user_choices$array_type == "v2") { rowMeans(detP_clean))), by = join_by("IlmnID" == "V1")) colnames(CpG_names_detP) <- c("IlmnID", "CpG_name", "detP_value") - cat(paste0(nrow(replicates), " replicate CpGs were detected")) + cat(paste0(nrow(replicates), " CpGs with one or more replicates were detected")) } ``` @@ -98,12 +98,12 @@ if (user_choices$array_type == "v2") { slice_min(detP_value) %>% ungroup() keep_replicates <- keep_replicates %>% - filter(duplicated(CpG_name) == FALSE) + filter(!duplicated(CpG_name)) # create list of replicates to exclude (CpGs in replicate list but probe not in keep list) exclude_replicates <- CpG_names_detP %>% filter(CpG_name %in% replicates$CpG_name) %>% - filter(!(IlmnID %in% keep_replicates$IlmnID)) + filter(!IlmnID %in% keep_replicates$IlmnID) # exclude replicates from betas, gRatioSet, detP and save data keep_gratioset <- !(featureNames(gRatioSet_clean) %in% exclude_replicates$IlmnID)