diff --git a/scripts/6_filtering_cpgs.Rmd b/scripts/6_filtering_cpgs.Rmd index e0870b2..db4c677 100644 --- a/scripts/6_filtering_cpgs.Rmd +++ b/scripts/6_filtering_cpgs.Rmd @@ -105,9 +105,10 @@ if (user_choices$array_type == "v2") { filter(CpG_name %in% replicates$CpG_name) %>% filter(!(IlmnID %in% keep_replicates$IlmnID)) - # exclude replicates from betas, RGSet, detP and save data - RGSet_clean <- subsetByLoci(RGSet_clean, excludeLoci = exclude_replicates$IlmnID) - save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) + # exclude replicates from betas, gRatioSet, detP and save data + keep_gratioset <- !(featureNames(gRatioSet_clean) %in% exclude_replicates$IlmnID) + gRatioSet_clean <- gRatioSet_clean[keep_gratioset,] + save(gRatioSet_clean, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata")) keep_betas <- !(rownames(Betas_clean) %in% exclude_replicates$IlmnID) Betas_clean <- Betas_clean[keep_betas,] @@ -126,13 +127,13 @@ if (user_choices$array_type == "v2") { keep_betas_df <- as.data.frame(keep_betas) cat(paste0(nrow(exclude_replicates), " replicate probes were removed")) - dim_RGSet_filtered <- dim(RGSet_clean) + dim_gRatioSet_filtered <- dim(gRatioSet_clean) dim_Betas_filtered <- dim(Betas_clean) step_number <- c("4", "4") step <- c("Filter replicates", "Filter replicates") - data_class <- c("RGSet", "Betas") - samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1]) + data_class <- c("gRatioSet", "Betas") + samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) + probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -371,7 +372,8 @@ if (user_choices$array_type == "v2") { keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inacc$IlmnID) Betas_clean_filtered <- Betas_clean_filtered[keep_betas,] - RGSet_clean_filtered <- subsetByLoci(RGSet_clean_filtered, excludeLoci = v2_mapping_inacc$IlmnID) + keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_mapping_inacc$IlmnID) + gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,] } ``` @@ -381,13 +383,13 @@ if (user_choices$array_type == "v2") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " CpGs show known mapping inaccuracies"), sep = "
\n") - dim_RGSet_filtered <- dim(RGSet_clean_filtered) + dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) dim_Betas_filtered <- dim(Betas_clean_filtered) step_number <- c("12", "12") step <- c("Filter Mapping Inaccuracies", "Filter Mapping Inaccuracies") - data_class <- c("RGSet", "Betas") - samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1]) + data_class <- c("gRatioSet", "Betas") + samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) + probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding) @@ -401,7 +403,8 @@ if (user_choices$array_type == "v2") { keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_flagged_probes$IlmnID) Betas_clean_filtered <- Betas_clean_filtered[keep_betas,] - RGSet_clean_filtered <- subsetByLoci(RGSet_clean_filtered, excludeLoci = v2_flagged_probes$IlmnID) + keep_gratioset <- !(featureNames(gRatioSet_clean_filtered) %in% v2_flagged_probes$IlmnID) + gRatioSet_clean_filtered <- gRatioSet_clean_filtered[keep_gratioset,] } ``` @@ -411,13 +414,13 @@ if (user_choices$array_type == "v2") { cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), " CpGs were flagged by Illumina"), sep = "
\n") - dim_RGSet_filtered <- dim(RGSet_clean_filtered) + dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered) dim_Betas_filtered <- dim(Betas_clean_filtered) step_number <- c("13", "13") step <- c("Filter Flagged Probes", "Filter Flagged Probes") - data_class <- c("RGSet", "Betas") - samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2]) - probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1]) + data_class <- c("gRatioSet", "Betas") + samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2]) + probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1]) table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes) summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)