Skip to content

Commit

Permalink
Determine number of included PCs via pc_cutoff function
Browse files Browse the repository at this point in the history
  • Loading branch information
Vera N. Karlbauer committed Nov 4, 2023
1 parent 2a826ea commit ee5059e
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 20 deletions.
13 changes: 7 additions & 6 deletions scripts/10_correction_batch_effects_unfiltered.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ start_time <- Sys.time()
```

```{r setup, include=FALSE}
source("../data/calculate_pc_cutoff.R") # source function for determining PC cutoff
needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "ggplot2",
"ggrepel", "broom", "gplots", "tidyr", "sva", "methods")
installed_packages <- needed_packages %in% rownames(installed.packages())
Expand Down Expand Up @@ -85,8 +86,8 @@ if (correction_variable_1 == "PLEASE FILL IN"){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that explained a minimum of >10% change of variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -244,8 +245,8 @@ if (correction_variable_2 == "PLEASE FILL IN"){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that explained a minimum of >10% change of variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -403,8 +404,8 @@ if (correction_variable_3 == "PLEASE FILL IN"){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that explained >10% change of variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down
17 changes: 9 additions & 8 deletions scripts/8_removal_of_outliers_detect_batch_effects.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ start_time <- Sys.time()


```{r setup, include=FALSE}
source("../data/calculate_pc_cutoff.R") # source function for determining PC cutoff
needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "ggplot2",
"ggrepel", "broom", "gplots", "tidyr", "sva", "methods")
installed_packages <- needed_packages %in% rownames(installed.packages())
Expand Down Expand Up @@ -124,8 +125,8 @@ variance_explained %>%
labs(x = "Principal Component", y = "Portion of variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that expained >10% of the variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -259,8 +260,8 @@ if(!identical(extreme_outliers_filtered %>% pull(personid),character(0))){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that expained >10% of the variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -470,8 +471,8 @@ variance_explained %>%
labs(x = "Principal Component", y = "Portion of variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that expained >10% of the variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -606,8 +607,8 @@ if(!identical(extreme_outliers_unfiltered %>% pull(personid),character(0))){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that expained >10% of the variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down
13 changes: 7 additions & 6 deletions scripts/9_correction_batch_effects_filtered.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ start_time <- Sys.time()
```

```{r setup, include=FALSE}
source("../data/calculate_pc_cutoff.R") # source function for determining PC cutoff
needed_packages <- c("BiocManager", "dplyr", "knitr", "rmarkdown", "tibble", "ggplot2",
"ggrepel", "broom", "gplots", "tidyr", "sva", "methods")
installed_packages <- needed_packages %in% rownames(installed.packages())
Expand Down Expand Up @@ -95,8 +96,8 @@ if (correction_variable_1 == "PLEASE FILL IN"){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that explained a minimum of >10% change of variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -254,8 +255,8 @@ if (correction_variable_2 == "PLEASE FILL IN"){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that explained a minimum of >10% change of variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down Expand Up @@ -413,8 +414,8 @@ if (correction_variable_3 == "PLEASE FILL IN"){
labs(x = "Principal Component", y = "Variance Explained (%)", title = "Scree Plot",
subtitle = "Note: cumulative variance explained (%) is displayed as text")
R <- variance_explained %>% filter(var_explained > 10) %>% nrow() # threshold for N of PCs
print(paste0(R, " PCs that explained >10% change of variance were included"))
R <- pc_cutoff(variance_explained) # automatically calculate number of PCs
print(paste0(R, " PCs were included"))
phenodata_subset_df <- as.data.frame(PhenoData_clean@listData) %>%
select(personid, plate, slide, array, row, column, arrayid)
Expand Down

0 comments on commit ee5059e

Please sign in to comment.