diff --git a/scripts/10_correction_batch_effects_unfiltered.Rmd b/scripts/10_correction_batch_effects_unfiltered.Rmd index ce10593..fae0021 100644 --- a/scripts/10_correction_batch_effects_unfiltered.Rmd +++ b/scripts/10_correction_batch_effects_unfiltered.Rmd @@ -30,6 +30,7 @@ start_time <- Sys.time() ``` ```{r setup, include=FALSE} +source("../data/calculate_pc_cutoff.R") # source function for determining PC cutoff needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "ggplot2", "ggrepel", "broom", "gplots", "tidyr", "sva", "methods") installed_packages <- needed_packages %in% rownames(installed.packages()) @@ -85,8 +86,8 @@ if (correction_variable_1 == "PLEASE FILL IN"){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that explained a minimum of >10% change of variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -244,8 +245,8 @@ if (correction_variable_2 == "PLEASE FILL IN"){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that explained a minimum of >10% change of variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -403,8 +404,8 @@ if (correction_variable_3 == "PLEASE FILL IN"){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that explained >10% change of variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) diff --git a/scripts/8_removal_of_outliers_detect_batch_effects.Rmd b/scripts/8_removal_of_outliers_detect_batch_effects.Rmd index 81f2c90..6b325a7 100644 --- a/scripts/8_removal_of_outliers_detect_batch_effects.Rmd +++ b/scripts/8_removal_of_outliers_detect_batch_effects.Rmd @@ -46,6 +46,7 @@ start_time <- Sys.time() ```{r setup, include=FALSE} +source("../data/calculate_pc_cutoff.R") # source function for determining PC cutoff needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "ggplot2", "ggrepel", "broom", "gplots", "tidyr", "sva", "methods") installed_packages <- needed_packages %in% rownames(installed.packages()) @@ -124,8 +125,8 @@ variance_explained %>% labs(x = "Principal Component", y = "Portion of variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") -R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs -print(paste0(R, " PCs that expained >10% of the variance were included")) +R <- pc_cutoff(variance_explained) # automatically calculate number of PCs +print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -259,8 +260,8 @@ if(!identical(extreme_outliers_filtered %>% pull(personid),character(0))){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that expained >10% of the variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -470,8 +471,8 @@ variance_explained %>% labs(x = "Principal Component", y = "Portion of variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") -R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs -print(paste0(R, " PCs that expained >10% of the variance were included")) +R <- pc_cutoff(variance_explained) # automatically calculate number of PCs +print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -606,8 +607,8 @@ if(!identical(extreme_outliers_unfiltered %>% pull(personid),character(0))){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that expained >10% of the variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) diff --git a/scripts/9_correction_batch_effects_filtered.Rmd b/scripts/9_correction_batch_effects_filtered.Rmd index 28c4525..2081997 100644 --- a/scripts/9_correction_batch_effects_filtered.Rmd +++ b/scripts/9_correction_batch_effects_filtered.Rmd @@ -41,6 +41,7 @@ start_time <- Sys.time() ``` ```{r setup, include=FALSE} +source("../data/calculate_pc_cutoff.R") # source function for determining PC cutoff needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "ggplot2", "ggrepel", "broom", "gplots", "tidyr", "sva", "methods") installed_packages <- needed_packages %in% rownames(installed.packages()) @@ -95,8 +96,8 @@ if (correction_variable_1 == "PLEASE FILL IN"){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that explained a minimum of >10% change of variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -254,8 +255,8 @@ if (correction_variable_2 == "PLEASE FILL IN"){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that explained a minimum of >10% change of variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid) @@ -413,8 +414,8 @@ if (correction_variable_3 == "PLEASE FILL IN"){ labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot", subtitle = "Note: cumulative variance explained (%) is displayed as text") - R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs - print(paste0(R, " PCs that explained >10% change of variance were included")) + R <- pc_cutoff(variance_explained) # automatically calculate number of PCs + print(paste0(R, " PCs were included")) phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>% select(personid, plate, slide, array, row, column, arrayid)