diff --git a/scripts/7_normalization.Rmd b/scripts/7_normalization.Rmd index 84ec679..a8e8adc 100644 --- a/scripts/7_normalization.Rmd +++ b/scripts/7_normalization.Rmd @@ -49,13 +49,12 @@ load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) load(paste0(user_choices$project_name, "/reports/annotations_clean.Rdata")) +load(paste0(user_choices$project_name, "/reports/annotations_clean_filtered.Rdata")) ``` # Description This script: - -* annotates the probes * normalizes data with stratified quantile normalization for filtered and unfiltered data (followed by BMIQ = beta-mixtrure quantile normalization) to minimize unwanted technique-related variation within and between samples @@ -67,21 +66,19 @@ data for further investigation. These files are saved to reports folder. **Note** The user can further alter the normalization methods if needed (e.g. include noob etc.) -```{r get annotation of filtered probes, include=FALSE} -annotations_clean_filtered = getAnnotation(gRatioSet_clean_filtered) -``` - ## Normalization of data performed -```{r normalize probes, include=FALSE} +```{r normalize filtered probes, include=FALSE} # Normalization of filtered data # Matrix of probes removed in all prior filtering steps exclusion_matrix <- Betas_clean[!rownames(Betas_clean) %in% rownames(Betas_clean_filtered), ] # Exclude all probes filtered script 5 from steps before to improve preprocessing RGSet_clean_filtered <- subsetByLoci(RGSet_clean, excludeLoci = rownames(exclusion_matrix)) - -gRatioSet_clean_filtered_quantile <- preprocessQuantile(RGSet_clean_filtered) +# Functional normalization +# Note: sex is set to "F" for all samples since sex chromosomes were already removed in script 6 +# Note: This does not affect the normalization or the phenotype data, it simply stops preprocessQuantile() from producing an error +gRatioSet_clean_filtered_quantile <- preprocessQuantile(RGSet_clean_filtered, sex = "F") save(gRatioSet_clean_filtered_quantile, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered_quantile.Rdata")) # output: GenomicRatioSet @@ -99,8 +96,15 @@ Betas_clean_filtered_quantile_bmiq <- apply(Betas_clean_filtered_quantile[,1:len save(Betas_clean_filtered_quantile_bmiq, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata")) +``` + +```{r. normalize unfiltered probes, include=FALSE} # Normalization of unfiltered data -gRatioSet_clean_unfiltered_quantile <- preprocessQuantile(RGSet_clean) + +# Functional normalization +# Note: sex is set to "F" for all samples since sex chromosomes were already removed in script 6 +# Note: This does not affect the normalization or the phenotype data, it simply stops preprocessQuantile() from producing an error +gRatioSet_clean_unfiltered_quantile <- preprocessQuantile(RGSet_clean, sex = "F") save(gRatioSet_clean_unfiltered_quantile, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_unfiltered_quantile.Rdata")) # output: GenomicRatioSet @@ -117,10 +121,9 @@ Betas_clean_unfiltered_quantile_bmiq <- apply(Betas_clean_unfiltered_quantile[,1 function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R" save(Betas_clean_unfiltered_quantile_bmiq, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata")) - -save(annotations_clean_filtered, file = paste0(user_choices$project_name, "/reports/annotations_clean_filtered.Rdata")) ``` + ## Beta values distribution report ```{r beta values distribution}