diff --git a/scripts/1_definitions_and_setup.Rmd b/scripts/1_definitions_and_setup.Rmd index 9d847f7..ea74e81 100644 --- a/scripts/1_definitions_and_setup.Rmd +++ b/scripts/1_definitions_and_setup.Rmd @@ -33,7 +33,7 @@ This script asks the user to define or choose: * project name * array type * population ethnicity -* detection *p* value limit (script 4 will exclude samples with *p* values higher than this value) +* detection *p* value limit for samples (script 4 will exclude samples with *p* values higher than this value) * extreme outliers definition (number of standard deviation of first and second PCA for script 10) * the name of the following columns as they are written in your phenotype data: + slide (example: "201715340007") @@ -52,12 +52,16 @@ The path name should not include the name of the github repository **Path to phenotype data:** The path to the data table containing phenotype information The path name should include the name of data table itself. This file must be **.csv** **Path to idat files:** The path to the raw files obtained from array +**Path to R package directories (optional):** The path to the personal directory for installing and loading packages. +This is needed if installation of R packages to the default R library are not permitted due to the lack of admin rights. +Thsi will enable R to install and load any needed packages without problems. +Please make sure you have the needed writing permissions in this specified directory **Array type:** "v1" or "v2". This stands for the different versions of the EPIC array **Population ethnicity:** Choose "AFR", "AMR", "ASN", "EUR" or "Global". Global may be used if the population is very diverse. This information will be useful for removing ethnic-specific hybridizing probes -**Detection p-value:** The threshold for the *p* value of detection. This *p* value indicates the -signal quality in the initial Quality Control (QC) (we typically use a +**Detection p-value of samples:** The threshold for the *p* value of detection. This *p* value indicates the +signal quality in the initial Quality Control (QC) for samples (we typically use a threshold of 0.05 and set this by default) **Define extreme outliers:** We define outliers as outside a certain number of standard deviations from the mean in the first and second principal component. You choose how many standard deviations @@ -104,10 +108,11 @@ The following information is required: personal_path <- "PLEASE FILL IN" pheno_path <- "PLEASE FILL IN" idat_path <- "PLEASE FILL IN" +package_path <- "PLEASE FILL IN" project_name <- "PLEASE FILL IN" array_type <- "PLEASE FILL IN" population_ethnicity <- "PLEASE FILL IN" -detP_threshold <- "0.05" +detP_sample_threshold <- "0.05" outlier_threshold <- "3" name_slide_column <- "PLEASE FILL IN" name_array_column <- "PLEASE FILL IN" @@ -118,6 +123,7 @@ name_sex_column <- "PLEASE FILL IN" additional_batch_variable_1 <- "PLEASE FILL IN" additional_batch_variable_2 <- "PLEASE FILL IN" additional_batch_variable_3 <- "PLEASE FILL IN" + ``` ```{r, save user choices, include=FALSE} @@ -128,7 +134,7 @@ user_choices <- data.frame( "project_name" = project_name, "array_type" = array_type, "population_ethnicity" = population_ethnicity, - "detP_threshold" = as.numeric(detP_threshold), + "detP_sample_threshold" = as.numeric(detP_sample_threshold), "outlier_threshold" = as.numeric(outlier_threshold), "name_slide_column" = name_slide_column, "name_array_column" = name_array_column, @@ -152,6 +158,9 @@ ifelse(pheno_path == "PLEASE FILL IN", ifelse(idat_path == "PLEASE FILL IN", idat_path_message <- "✘ Please specify the path to idat files", idat_path_message <- "✔ Path to idat files was provided") +ifelse(package_path == "PLEASE FILL IN", + package_path_message <- "✔ Package installation path was not specified and therefore default path will be used", + package_path_message <- paste0("✔ Package installation path was specified: ", package_path, ". all R packages will be installed here.")) ifelse(project_name == "PLEASE FILL IN", project_name_message <- "✘ Please specify project name", project_name_message <- "✔ Project name was provided") @@ -163,9 +172,9 @@ ifelse(population_ethnicity %in% c("AFR", "AMR", "ASN", "EUR", "Global"), population_ethnicity_message <- "✔ Population ethnicity is OK", population_ethnicity_message <- paste0("✘ This population ethnicity is not available: ", population_ethnicity, ". Please use AFR, AMR, ASN, EUR or Global")) -ifelse(detP_threshold == "0.05" | detP_threshold == "0.01", - detP_threshold_message <- "✔ Detection p-value is OK", - detP_threshold_message <- paste0("✘ This detection p-value is not available: ", detP_threshold, ". Please use 0.05 or 0.01")) +ifelse(detP_sample_threshold == "0.05" | detP_sample_threshold == "0.01", + detP_sample_threshold_message <- "✔ Detection p-value is OK", + detP_sample_threshold_message <- paste0("✘ This detection p-value is not available: ", detP_sample_threshold, ". Please use 0.05 or 0.01")) ifelse(outlier_threshold == "3", outlier_threshold_message <- "✔ Outlier threshold is set to standard: 3 standard deviations", outlier_threshold_message <- paste0("✔ New outlier threshold was set to be: ", outlier_threshold, " standard deviations")) @@ -225,9 +234,9 @@ user_choices %>% paged_table() ## Check user definitions ```{r, users definitions check, results = 'asis'} -cat(personal_path_message, pheno_path_message, idat_path_message, +cat(personal_path_message, pheno_path_message, idat_path_message, package_path_message, project_name_message, array_type_message, population_ethnicity_message, - detP_threshold_message, outlier_threshold_message, phenotype_names_message, + detP_sample_threshold_message, outlier_threshold_message, phenotype_names_message, additional_batch_variables_message, sep = "
\n") ```