Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
mpip
/
EPIC_Preprocessing_Pipeline
Public
Notifications
You must be signed in to change notification settings
Fork
1
Star
0
Code
Issues
0
Pull requests
0
Actions
Projects
1
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Security
Insights
Files
9ff8c73
data
pipeline_flowchart
scripts
10_correction_batch_effects_unfiltered.Rmd
11_annotate_hg19_create_pseudo_epicv1_from_epicv2.Rmd
1_definitions_and_setup.Rmd
2_import_raw_DNAm_data.Rmd
3_quality_control.Rmd
4_check_matching_of_epigenetic_sex.Rmd
5_samples_exclusion.Rmd
6_filtering_cpgs.Rmd
7_normalization.Rmd
8_removal_of_outliers_detect_batch_effects.Rmd
9_correction_batch_effects_filtered.Rmd
.gitignore
LICENSE
README.md
Breadcrumbs
EPIC_Preprocessing_Pipeline
/
scripts
/
7_normalization.Rmd
Blame
Blame
Latest commit
History
History
239 lines (207 loc) · 12.2 KB
Breadcrumbs
EPIC_Preprocessing_Pipeline
/
scripts
/
7_normalization.Rmd
Top
File metadata and controls
Code
Blame
239 lines (207 loc) · 12.2 KB
Raw
--- title: "Script 7: Normalization" date: '`r format(Sys.time(), "%d %B, %Y")`' output: html_document --- ```{r runtime start, include=FALSE} start_time <- Sys.time() ``` ```{r setup, include=FALSE} source("../data/BMIQ_1.6_Teschendorff.R") # load script for BMIQ normalization needed_packages <- c("BiocManager","RPMM", "knitr", "dplyr", "data.table") user_choices <- readRDS("../data/user_choices.rds") if(user_choices$package_path != "PLEASE FILL IN"){ .libPaths(c(user_choices$package_path, .libPaths())) } installed_packages <- needed_packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(needed_packages[!installed_packages], repos = "http://cran.us.r-project.org") } lapply(needed_packages, library, character.only = TRUE) if (!("minfi" %in% rownames(installed.packages()))) { BiocManager::install("minfi") } library(minfi) knitr::opts_knit$set(root.dir = paste0(user_choices$personal_path, "/")) knitr::opts_chunk$set(echo = FALSE) if (user_choices$array_type == "v2") { if (!("jokergoo/IlluminaHumanMethylationEPICv2manifest" %in% rownames(installed.packages()))) { BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2manifest") } if (!("IlluminaHumanMethylationEPICv2anno.20a1.hg38" %in% rownames(installed.packages()))) { BiocManager::install("jokergoo/IlluminaHumanMethylationEPICv2anno.20a1.hg38") } library(IlluminaHumanMethylationEPICv2manifest) library(IlluminaHumanMethylationEPICv2anno.20a1.hg38) } ``` ```{r load data, include=FALSE} load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata")) load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata")) load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata")) load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata")) load(paste0(user_choices$project_name, "/reports/annotations_clean_unfiltered.Rdata")) load(paste0(user_choices$project_name, "/reports/annotations_clean_filtered.Rdata")) ``` # Description This script: * normalizes data with stratified quantile normalization for filtered and unfiltered data (followed by BMIQ = beta-mixtrure quantile normalization) to minimize unwanted technique-related variation within and between samples * visualizes beta densities before and after normalization * creates two pdf files of beta densities plots per sample for raw and normalized data for further investigation. These files are saved to reports folder. <br> **Note** The user can further alter the normalization methods if needed (e.g. include noob etc.) ## Normalization of data performed ```{r normalize filtered probes, include=FALSE} # Normalization of filtered data # Matrix of probes removed in all prior filtering steps exclusion_matrix <- Betas_clean[!rownames(Betas_clean) %in% rownames(Betas_clean_filtered), ] # Exclude all probes filtered script 5 from steps before to improve preprocessing RGSet_clean_filtered <- subsetByLoci(RGSet_clean, excludeLoci = rownames(exclusion_matrix)) # Functional normalization # Note: sex is set to "F" for all samples since sex chromosomes were already removed in script 6 # Note: This does not affect the normalization or the phenotype data, it simply stops preprocessQuantile() from producing an error gRatioSet_clean_filtered_quantile <- preprocessQuantile(RGSet_clean_filtered, sex = "F") save(gRatioSet_clean_filtered_quantile, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_filtered_quantile.Rdata")) # output: GenomicRatioSet Betas_clean_filtered_quantile <- getBeta(gRatioSet_clean_filtered_quantile) save(Betas_clean_filtered_quantile, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile.Rdata")) Ms_clean_filtered_qunatile <- getM(gRatioSet_clean_filtered_quantile) save(Ms_clean_filtered_qunatile, file = paste0(user_choices$project_name, "/processed_data/Ms_clean_filtered_qunatile.Rdata")) # further normalization with BMIQ: probeType <- as.data.frame(annotations_clean_filtered[rownames(Betas_clean_filtered_quantile),c("Name","Type")]) probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2) Betas_clean_filtered_quantile_bmiq <- apply(Betas_clean_filtered_quantile[,1:length(colnames(Betas_clean_filtered_quantile))],2, function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R" save(Betas_clean_filtered_quantile_bmiq, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered_quantile_bmiq.Rdata")) ``` ```{r. normalize unfiltered probes, include=FALSE} # Normalization of unfiltered data # Functional normalization # Note: sex is set to "F" for all samples since sex chromosomes were already removed in script 6 # Note: This does not affect the normalization or the phenotype data, it simply stops preprocessQuantile() from producing an error gRatioSet_clean_unfiltered_quantile <- preprocessQuantile(RGSet_clean, sex = "F") save(gRatioSet_clean_unfiltered_quantile, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean_unfiltered_quantile.Rdata")) # output: GenomicRatioSet Betas_clean_unfiltered_quantile <- getBeta(gRatioSet_clean_unfiltered_quantile) save(Betas_clean_unfiltered_quantile, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile.Rdata")) Ms_clean_unfiltered_qunatile <- getM(gRatioSet_clean_unfiltered_quantile) save(Ms_clean_unfiltered_qunatile, file = paste0(user_choices$project_name, "/processed_data/Ms_clean_unfiltered_qunatile.Rdata")) # further normalization with BMIQ: probeType <- as.data.frame(annotations_clean_unfiltered[rownames(Betas_clean_unfiltered_quantile),c("Name","Type")]) probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2) Betas_clean_unfiltered_quantile_bmiq <- apply(Betas_clean_unfiltered_quantile[,1:length(colnames(Betas_clean_unfiltered_quantile))],2, function(a) BMIQ(a,probeType$probeType,plots=FALSE)$nbeta) # sourced from script "BMIQ_1.6_Teschendorff.R" save(Betas_clean_unfiltered_quantile_bmiq, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_unfiltered_quantile_bmiq.Rdata")) ``` ## Beta values distribution report ```{r beta values distribution} densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Raw Betas", xlab = "Beta Value") densityPlot(Betas_clean_unfiltered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Unfiltered and Quantile Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_unfiltered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Unfiltered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered and Quantile Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") ``` ```{r, save reports as pdf, include=FALSE} pdf(file = paste0(user_choices$project_name, "/reports/beta_distributions_raw_filtered_normalized.pdf")) densityPlot(Betas_clean, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Raw Betas", xlab = "Beta Value") densityPlot(Betas_clean_unfiltered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Unfiltered and Quantile Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_unfiltered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Unfiltered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered_quantile, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered and Quantile Adjusted Betas", xlab = "Beta Value") densityPlot(Betas_clean_filtered_quantile_bmiq, sampGroups = PhenoData_clean$Slide, legend = FALSE, pal = "darkblue", main = "Filtered and Quantile-BMIQ Adjusted Betas", xlab = "Beta Value") dev.off() # additional plots to look for outliers in distribution: names_Betas <- colnames(Betas_clean) pdf(file = paste0(user_choices$project_name, "/reports/beta_densities_raw.pdf")) for (i in 1:ncol(Betas_clean)) { i_mat <- as.matrix(Betas_clean[ ,i]) densityPlot(i_mat, pal = "darkblue", main = names_Betas[i]) } dev.off() names_filtered_quantileBetas <- colnames(Betas_clean_filtered_quantile) pdf(paste0(user_choices$project_name, "/reports/beta_densities_filtered_quantile_normalized.pdf")) for (i in 1:ncol(Betas_clean_filtered_quantile)) { i_mat <- as.matrix(Betas_clean_filtered_quantile[ ,i]) name <- colnames(Betas_clean_filtered_quantile[,i]) densityPlot(i_mat, pal = "darkblue", main = names_filtered_quantileBetas[i]) } dev.off() names_unfiltered_quantileBetas <- colnames(Betas_clean_unfiltered_quantile) pdf(paste0(user_choices$project_name, "/reports/beta_densities_unfiltered_quantile_normalized.pdf")) for (i in 1:ncol(Betas_clean_unfiltered_quantile)) { i_mat <- as.matrix(Betas_clean_unfiltered_quantile[ ,i]) name <- colnames(Betas_clean_unfiltered_quantile[,i]) densityPlot(i_mat, pal = "darkblue", main = names_unfiltered_quantileBetas[i]) } dev.off() ``` ## User report ```{r, info output for user 1, results='asis'} cat("Filtered and unfiltered data underwent stratified quantile normalization followed by BMIQ and were saved to the processed data folder. ", sep = "<br>\n") if (length(which(is.nan(Betas_clean_unfiltered_quantile_bmiq))) == 0) { cat("There were no NAs in unfiltered data after normalization", sep = "<br>\n") } else { cat("NAs were produced during normalization, please re-check!", sep = "<br>\n") } if (length(which(is.nan(Betas_clean_filtered_quantile_bmiq))) == 0) { cat("There were no NAs in filtered data after normalization", sep = "<br>\n") } else { cat("NAs were produced during normalization, please re-check!", sep = "<br>\n") } ``` ```{r, info output for user 2, results='asis'} cat("Session info text file was updated", sep = "<br>\n") ``` ```{r, document session info into a text file, include=FALSE} connection <- file(paste0(user_choices$personal_path, "/", user_choices$project_name, "/session_info_all.txt"), "a+") writeLines("######################################################################", connection) writeLines("############################ Script 7: #############################", connection) writeLines("######################################################################", connection) sessioninfo <- sessionInfo() writeLines("\nR Version:", connection) writeLines(paste0(" ", sessioninfo$R.version$version.string), connection) writeLines("\nAttached packages:", connection) # nicely format packages (with version) package_version <- unlist(lapply(sessioninfo$otherPkgs, function(x){paste0(" ",x$Package, " (", x$Version, ")")})) names(package_version) <- NULL for(i in 1:length(package_version)) { writeLines(package_version[i], connection) } writeLines("\n", connection) close(connection) ``` ```{r runtime end, include=FALSE} end_time <- Sys.time() run_time <- as.numeric(round((end_time - start_time), 1), units = "mins") ``` ## Completion time ```{r, completion time, results = 'asis'} cat("The time for the script to run was:", run_time, "minutes", sep = " ") ```
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
You can’t perform that action at this time.