diff --git a/scripts/6_filtering_cpgs.Rmd b/scripts/6_filtering_cpgs.Rmd
index b98aefb..55bdf32 100644
--- a/scripts/6_filtering_cpgs.Rmd
+++ b/scripts/6_filtering_cpgs.Rmd
@@ -48,23 +48,90 @@ v2_flagged_probes <- read.csv("../data/EPIC-8v2-0_A1-FlaggedProbes.csv")}
 
 # Description
 
-This script filters out poor performing probes with unreliable signal from 
+This script annotates probes and filters out poor performing probes with unreliable signal from 
 beta values and the genomic ratio set (RGSet with annotations).  
 We will remove probes that:  
-
+* have replicates and do not perform well (v2 only, keep replicate with best detP-value) 
 * have technical failures in one or more samples (i.e. those with detection *p* value of probes > 0.01)  
 * are on sex chromosomes  
 * might be affected by common SNPs  
 * cross-hybridize (i.e. those that map to multiple places in genome) as described by Chen et al. (v1.0 only)  
 * cross-hybridize as described by McCartney et al. (v1.0 only)    
 * are polymorphic (v1.0 only)  
-* show mapping inaccuracies or were flagged by Illumina (v2 only)
-* have replicates and do not perform well (v2 only, keep replicate with best detP-value)  
+* show mapping inaccuracies or were flagged by Illumina (v2 only) 
+Please note: replicate removal is performed for the filtered AND unfiltered data (v2 only.  
 
 ```{r get annotation of probes, include=FALSE}
 annotations_clean = getAnnotation(RGSet_clean)
 ```
 
+## Replicate removal
+### Detect replicates (v2 only)
+
+```{r, detect replicates}
+# add non-unique CpG name
+CpG_names <- as.data.frame(rownames(Betas_clean))
+colnames(CpG_names) <- c("IlmnID")
+CpG_names$CpG_name <- str_sub(CpG_names$IlmnID, 1, 10)
+# isolate replicate IDs
+replicates <- CpG_names %>%
+  group_by(CpG_name) %>%
+  filter(n()>1) %>% 
+  distinct(CpG_name)
+
+# link to detP values
+CpG_names_detP <- left_join(CpG_names, as.data.frame(cbind(rownames(detP_clean), rowMeans(detP_clean))), by = join_by("IlmnID" == "V1"))
+colnames(CpG_names_detP) <- c("IlmnID", "CpG_name", "detP_value")
+
+cat(paste0(nrow(replicates), " replicate CpGs were detected"))
+```
+
+```{r, exclude replicates and save data, include = FALSE}
+## create list of replicates with lowest detP values, if detPs are equal, take first replicate
+keep_replicates <- CpG_names_detP %>%
+  filter(CpG_name %in% replicates$CpG_name) %>%
+  group_by(CpG_name) %>%
+  slice_min(detP_value) %>%
+  ungroup() 
+keep_replicates <- keep_replicates %>%
+  filter(duplicated(CpG_name) == FALSE)
+
+# create list of replicates to exclude (CpGs in replicate list but probe not in keep list)
+exclude_replicates <- CpG_names_detP %>%
+  filter(CpG_name %in% replicates$CpG_name) %>%
+  filter(!(IlmnID %in% keep_replicates$IlmnID))
+  
+# exclude replicates from betas, RGSet, detP and save data
+RGSet_clean <- subsetByLoci(RGSet_clean, excludeLoci = exclude_replicates$IlmnID)
+save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
+
+keep_betas <- !(rownames(Betas_clean) %in% exclude_replicates$IlmnID) 
+Betas_clean <- Betas_clean[keep_betas,]
+save(Betas_clean, file = paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
+
+keep_detP <- !(rownames(detP_clean) %in% exclude_replicates$IlmnID) 
+detP_clean <- detP_clean[keep_detP,]
+save(detP_clean, file = paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
+```
+
+### Exclude replicates 
+
+```{r, info output for replicate removal, results='asis'}
+keep_betas_df <- as.data.frame(keep_betas)
+cat(paste0(nrow(exclude_replicates), " replicate probes were removed"))
+
+dim_RGSet_filtered <- dim(RGSet_clean)
+dim_Betas_filtered <- dim(Betas_clean)
+step_number <- c("4", "4")
+step <- c("Filter replicates", "Filter replicates")
+data_class <- c("RGSet", "Betas")
+samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2])
+probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1])
+
+table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
+summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
+```
+
 ## Removal of failed probes in one or more samples
 
 ```{r filter out failed probes, include=FALSE}
@@ -86,7 +153,7 @@ cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
 
 dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
 dim_Betas_filtered <- dim(Betas_clean_filtered)
-step_number <- c("4", "4")
+step_number <- c("5", "5")
 step <- c("Filter technical", "Filter technical")
 data_class <- c("gRatioSet", "Betas")
 samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
@@ -115,7 +182,7 @@ cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
 
 dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
 dim_Betas_filtered <- dim(Betas_clean_filtered)
-step_number <- c("5", "5")
+step_number <- c("6", "6")
 step <- c("Filter X Chromosome", "Filter X Chromosome")
 data_class <- c("gRatioSet", "Betas")
 samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
@@ -142,7 +209,7 @@ cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(),
 
 dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
 dim_Betas_filtered <- dim(Betas_clean_filtered)
-step_number <- c("6", "6")
+step_number <- c("7", "7")
 step <- c("Filter Y Chromosome", "Filter Y Chromosome")
 data_class <- c("gRatioSet", "Betas")
 samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
@@ -155,41 +222,38 @@ summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_prep
 ## Removal of probes possibly affected by common SNPs
 
 ```{r filter out probes affected by snps, include=FALSE}
-if (user_choices$array_type == "v1") {
-  gRatioSet_clean_filtered <- dropLociWithSnps(gRatioSet_clean_filtered) # function requires a GenomicRatioSet
-  Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in% featureNames(gRatioSet_clean_filtered),]
-}
+gRatioSet_clean_filtered <- dropLociWithSnps(gRatioSet_clean_filtered) # function requires a GenomicRatioSet
+Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in% featureNames(gRatioSet_clean_filtered),]
 ```
 
 ```{r, info output for user probes affected by snps, results='asis'}
-if (user_choices$array_type == "v1") {
-  dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
-  dim_Betas_filtered <- dim(gRatioSet_clean_filtered)
-  step_number <- c("7", "7")
-  step <- c("Filter SNP Affected", "Filter SNP Affected")
-  data_class <- c("gRatioSet", "Betas")
-  samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
-  probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
+dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
+dim_Betas_filtered <- dim(gRatioSet_clean_filtered)
+step_number <- c("8", "8")
+step <- c("Filter SNP Affected", "Filter SNP Affected")
+data_class <- c("gRatioSet", "Betas")
+samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
+probes <- c(dim_gRatioSet_filtered[1], dim_Betas_filtered[1])
   
-  table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
-  summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
+table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
+summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
   
-  probes_7_step <- as.numeric(summary_table_preprocessing %>% 
-                                filter(data_class == "Betas", step_number == "7") %>%
+probes_8_step <- as.numeric(summary_table_preprocessing %>% 
+                                filter(data_class == "Betas", step_number == "8") %>%
                                 pull(probes))
   
-  probes_6_step <- as.numeric(summary_table_preprocessing %>% 
-                                filter(data_class == "Betas", step_number == "6") %>%
+probes_7_step <- as.numeric(summary_table_preprocessing %>% 
+                                filter(data_class == "Betas", step_number == "7") %>%
                                 pull(probes))
   
-  delta_betas_probes <- probes_6_step - probes_7_step
+delta_betas_probes <- probes_7_step - probes_8_step
   
-  cat(paste0(as.character(delta_betas_probes), 
+cat(paste0(as.character(delta_betas_probes), 
              " probes were possibly affected by SNPs"), sep = "<br>\n")
-}
+
 ```
 
-## Removal of cross-hybridizing probes (Chen et al.)
+## Removal of cross-hybridizing probes (Chen et al., v1 only)
 
 ```{r filter out cross-hybridizing probes chen, include=FALSE}
 if (user_choices$array_type == "v1") {
@@ -213,7 +277,7 @@ if (user_choices$array_type == "v1") {
   
   dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
   dim_Betas_filtered <- dim(Betas_clean_filtered)
-  step_number <- c("8", "9")
+  step_number <- c("9", "9")
   step <- c("Filter Cross-Hybridizing Chen", "Filter Cross-Hybridizing Chen")
   data_class <- c("gRatioSet", "Betas")
   samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
@@ -224,7 +288,7 @@ if (user_choices$array_type == "v1") {
 }
 ```
 
-## Removal of Cross-hybridizing probes (McCartney et al.)
+## Removal of Cross-hybridizing probes (McCartney et al., v1 only)
 
 ```{r, filter out cross-hybridizing probes mccartney, include=FALSE}
 if (user_choices$array_type == "v1") {
@@ -246,7 +310,7 @@ if (user_choices$array_type == "v1") {
   
   dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
   dim_Betas_filtered <- dim(Betas_clean_filtered)
-  step_number <- c("9", "9")
+  step_number <- c("10", "10")
   step <- c("Filter Cross-Hybridizing McCartney", "Filter Cross-Hybridizing McCartney")
   data_class <- c("gRatioSet", "Betas")
   samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
@@ -257,7 +321,7 @@ if (user_choices$array_type == "v1") {
 }
 ```
 
-## Removal of polymorphic probes
+## Removal of polymorphic probes (v1 only)
 
 ```{r, filter out polymorphic probes, include=FALSE}
 if (user_choices$array_type == "v1") {
@@ -281,7 +345,7 @@ if (user_choices$array_type == "v1") {
   
   dim_gRatioSet_filtered <- dim(gRatioSet_clean_filtered)
   dim_Betas_filtered <- dim(Betas_clean_filtered)
-  step_number <- c("10", "10")
+  step_number <- c("11", "11")
   step <- c("Filter Polymorphic", "Filter Polymorphic")
   data_class <- c("gRatioSet", "Betas")
   samples <- c(dim_gRatioSet_filtered[2], dim_Betas_filtered[2])
@@ -293,6 +357,66 @@ if (user_choices$array_type == "v1") {
 saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
 ```
 
+## Removal of mapping inaccuracies (v2 only)
+
+```{r, filter out mapping inaccuracies, include=FALSE}
+if (user_choices$array_type == "v2") {
+  keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_mapping_inacc$IlmnID)
+  Betas_clean_filtered <- Betas_clean_filtered[keep_betas,]
+  
+  RGSet_clean_filtered <- subsetByLoci(RGSet_clean_filtered, excludeLoci = v2_mapping_inacc$IlmnID)
+}
+```
+
+```{r, info output for user mapping inaccuracies, results='asis'}
+if (user_choices$array_type == "v2") {
+  keep_betas_df <- as.data.frame(keep_betas)
+  cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), 
+             " CpGs show known mapping inaccuracies"), sep = "<br>\n")
+  
+  dim_RGSet_filtered <- dim(RGSet_clean_filtered)
+  dim_Betas_filtered <- dim(Betas_clean_filtered)
+  step_number <- c("12", "12")
+  step <- c("Filter Mapping Inaccuracies", "Filter Mapping Inaccuracies")
+  data_class <- c("RGSet", "Betas")
+  samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2])
+  probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1])
+  
+  table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
+  summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
+}
+```
+
+## Removal of flagged probes (v2 only)
+
+```{r, filter out flagged probes, include=FALSE}
+if (user_choices$array_type == "v2") {
+  keep_betas <- !(rownames(Betas_clean_filtered) %in% v2_flagged_probes$IlmnID)
+  Betas_clean_filtered <- Betas_clean_filtered[keep_betas,]
+  
+  RGSet_clean_filtered <- subsetByLoci(RGSet_clean_filtered, excludeLoci = v2_flagged_probes$IlmnID)
+}
+```
+
+```{r, info output for user flagged probes, results='asis'}
+if (user_choices$array_type == "v2") {
+  keep_betas_df <- as.data.frame(keep_betas)
+  cat(paste0(keep_betas_df %>% filter(keep_betas == FALSE) %>% nrow(), 
+             " CpGs were flagged by Illumina"), sep = "<br>\n")
+  
+  dim_RGSet_filtered <- dim(RGSet_clean_filtered)
+  dim_Betas_filtered <- dim(Betas_clean_filtered)
+  step_number <- c("13", "13")
+  step <- c("Filter Flagged Probes", "Filter Flagged Probes")
+  data_class <- c("RGSet", "Betas")
+  samples <- c(dim_RGSet_filtered[2], dim_Betas_filtered[2])
+  probes <- c(dim_RGSet_filtered[1], dim_Betas_filtered[1])
+  
+  table_preprocessing_adding <- data.frame(step_number, step, data_class, samples, probes)
+  summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_preprocessing_adding)
+}
+```
+
 ```{r, save data, include=FALSE}
 save(dim_gRatioSet_filtered, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata"))
 
@@ -306,8 +430,6 @@ save(Ms_clean_filtered, file = paste0(user_choices$project_name, "/processed_dat
 ```{r, save annotations, include=FALSE}
 save(annotations_clean, file = paste0(user_choices$project_name, "/reports/annotations_clean.Rdata"))
 
-
-
 ```
 
 ## Beta values distribution report