Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Fkbp5_DNAm_HAMTBS_mouse/analysis_script.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
769 lines (706 sloc)
35.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Analysis script for the paper: | |
# "Extensive evaluation of DNA methylation of functional elements in the murine | |
# Fkbp5 locus using high accuracy DNA methylation measurement via targeted | |
# bisulfite sequencing" | |
# | |
# ============================================================================== | |
# | |
# Author: Natan Yusupov, natan_yusupov@psych.mpg.de | |
# | |
# Description: The script analyses DNAm data of 157 CpGs from three murine | |
# tissues/brain regions as described in details in: | |
# Yusupov N, van Doeselaar L, Röh S, Wiechmann T, Ködel M, Sauer S, Rex-Haffner M, | |
# Schmidt MV, Binder EB. Extensive evaluation of DNA methylation of functional | |
# elements in the murine Fkbp5 locus using high-accuracy DNA methylation | |
# measurement via targeted bisulfite sequencing. Eur J Neurosci. 2023 Jul 6. | |
# doi: 10.1111/ejn.16078. PMID: 37414581. | |
# | |
# Summary of steps performed in the script "analysis_script.R": | |
# 1. Load libraries and data | |
# 2. Label primers with corresponding functional annotations of the locus | |
# 3. Data preparation: | |
# A. Order CpGs in genomic order | |
# B. Separate to DNAm data and covariates data | |
# C. Transform data to M-Values | |
# D. Prepare data in long format | |
# 4. Calculate mean and standard deviation per CpG for each tissue/brain region and ELS status | |
# 5. Calculate delta mean of DNAm for: FC - Blood, HIP - Blood, HIP - FC | |
# 6. Plot DNAm patterns across tissues/brain regions (samples) | |
# 7. Plot DNAm patterns across tissues/brain regions (mean and standard deviation) | |
# 8. Plot delta DNAm patterns across tissues/brain regions | |
# 9. Filter CpGs with low variance at baseline (IQR=<1) | |
# 10. Perform linear regression in each tissue/brain region to study ELS effects on DNAm of each CpG | |
# 11. Visualize significant CpGs | |
# | |
# ============================================================================== | |
# 1. Load libraries and data | |
library(tidyverse) | |
library(broom) | |
library(ggpubr) | |
data_df <- read.csv("data_df.csv", check.names = F) %>% | |
mutate(row = as.factor(row), column = as.factor(column), ELS_status = as.factor(ELS_status), | |
tissue = as.factor(tissue)) | |
cpgs_amplicon_info <- read.csv("cpgs_amplicon_info.csv") | |
# 2. Label primers with corresponding functional annotations of the locus | |
intron_8 <- cpgs_amplicon_info %>% | |
filter(amplicon %in% c("11_fk5_gre_i8.1", "11_fk5_gre_i8.2")) %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
intron_5 <- cpgs_amplicon_info %>% | |
filter(amplicon %in% c("10_fk5_gre_i5.1", "10_fk5_gre_i5.2", "10_fk5_gre_i5.3","9_fk5_gre_i5")) %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
intron_1 <- cpgs_amplicon_info %>% | |
filter(amplicon %in% c("5_fk5_gre_i1", "6_fk5_gre_i1","7_fk5_gre_i1","8_fk5_gre_i1")) %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
tss <- cpgs_amplicon_info %>% filter(amplicon == "18_fk5_TSS") %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
proximal_enhancer <- cpgs_amplicon_info %>% | |
filter(amplicon %in% c("1_fk5_gre_pE", "2_fk5_gre_pE_1", "2_fk5_gre_pE_2", "4_fk5_gre_pE")) %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
ctcf_5UTR <- cpgs_amplicon_info %>% | |
filter(amplicon %in% c("12_fk5_ctcf_5UTR.1", "12_fk5_ctcf_5UTR.2", "13_fk5_ctcf_5UTR")) %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
proximal_enhancer_1 <- cpgs_amplicon_info %>% | |
filter(amplicon == "1_fk5_gre_pE") %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
proximal_enhancer_2 <- cpgs_amplicon_info %>% | |
filter(amplicon %in% c("2_fk5_gre_pE_1", "2_fk5_gre_pE_2")) %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
proximal_enhancer_3 <- cpgs_amplicon_info %>% | |
filter(amplicon == "4_fk5_gre_pE") %>% | |
select(cpg_genomic) %>% pull() %>% sort() %>% as.character() | |
# 3. data preparation | |
# A. Order CpGs in genomic order | |
target_cpgs <- c(intron_8, intron_5, intron_1, tss, proximal_enhancer, ctcf_5UTR) | |
# B. Separate to DNAm data and covariates data | |
methylation_df <- data_df %>% | |
dplyr::select(sample, contains(target_cpgs)) %>% | |
column_to_rownames(var = "sample") | |
covariates <- data_df %>% | |
select(sample, row, column, ELS_status, tissue) | |
# C. Transform data to M-Values | |
methylation_beta <- apply(methylation_df, 2, function(x) x/100) # Beta values | |
methylation_mval <- apply(methylation_beta, 2, function(x) log2((x)/(1-x))) # M values | |
# D. Prepare data in long format | |
methylation_df_long <- methylation_df %>% | |
rownames_to_column("sample") %>% | |
pivot_longer(cols = -sample, names_to = "cpg", values_to = "methylation") %>% | |
separate(sample, sep = "-", into = c("samplenumber", "tissue"), remove = F) %>% | |
mutate(functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
functional_region_detailed = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer_1 ~ "Proximal Enhancer: Part 1", | |
cpg %in% proximal_enhancer_2 ~ "Proximal Enhancer: Part 2", | |
cpg %in% proximal_enhancer_3 ~ "Proximal Enhancer: Part 3", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
cpg = as.factor(cpg), | |
tissue = as.factor(tissue)) | |
methylation_mval_long <- methylation_mval %>% | |
as.data.frame() %>% | |
rownames_to_column("sample") %>% | |
pivot_longer(cols = -sample, names_to = "cpg", values_to = "methylation") %>% | |
separate(sample, sep = "-", into = c("samplenumber", "tissue"), remove = F) %>% | |
mutate(functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
functional_region_detailed = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer_1 ~ "Proximal Enhancer: Part 1", | |
cpg %in% proximal_enhancer_2 ~ "Proximal Enhancer: Part 2", | |
cpg %in% proximal_enhancer_3 ~ "Proximal Enhancer: Part 3", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
cpg = as.factor(cpg), | |
tissue = as.factor(tissue)) | |
# 4. Calculate mean and standard deviation per CpG for each tissue/brain region and ELS status | |
methylation_mean_sd_tissue <- data_df %>% | |
group_by(tissue) %>% | |
dplyr::summarise(across(contains(target_cpgs), | |
.fns = list(mean = ~ mean(.x, na.rm = TRUE), sd = ~ sd(.x, na.rm = TRUE)), | |
.names = "{.fn}_{.col}")) %>% | |
pivot_longer(cols = -tissue, names_to = "which" , values_to = "methylation") %>% | |
separate(which, into = c("which","cpg"), sep = "_") %>% | |
spread(key = which, value = methylation) %>% | |
mutate(functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error"))) | |
methylation_mean_sd_status <- data_df %>% | |
group_by(tissue, ELS_status) %>% | |
dplyr::summarise(across(contains(target_cpgs), | |
.fns = list(mean = ~ mean(.x, na.rm = TRUE), | |
sd = ~ sd(.x, na.rm = TRUE)), | |
.names = "{.fn}_{.col}"), .groups = "keep") %>% | |
pivot_longer(cols = -c("tissue","ELS_status"), | |
names_to = "which" , values_to = "methylation") %>% | |
separate(which, into = c("which","cpg"), sep = "_") %>% | |
spread(key = which, value = methylation) %>% | |
mutate(functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
functional_region_detailed = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer_1 ~ "Proximal Enhancer: Part 1", | |
cpg %in% proximal_enhancer_2 ~ "Proximal Enhancer: Part 2", | |
cpg %in% proximal_enhancer_3 ~ "Proximal Enhancer: Part 3", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
cpg = as.factor(cpg)) | |
# 5. Calculate delta mean of DNAm for: FC - Blood, HIP - Blood, HIP - FC | |
delta_mouse <- data_df %>% | |
filter(ELS_status == "Control") %>% | |
select(-ELS_status, -sample) %>% | |
group_by(tissue) %>% | |
dplyr::summarise(across("28544982":"28665256", | |
.fns = ~ mean(.x, na.rm = TRUE))) %>% | |
pivot_longer(cols = -tissue, names_to = "cpg" , values_to = "mean") | |
delta_mouse_blood <- delta_mouse %>% | |
filter(tissue == "Blood") %>% select(cpg, mean_blood = "mean") | |
delta_mouse_fc <- delta_mouse %>% | |
filter(tissue == "FC") %>% select(cpg, mean_fc = "mean") | |
delta_mouse_hip <- delta_mouse %>% | |
filter(tissue == "HIP") %>% select(cpg, mean_hip = "mean") | |
delta_final_df <- delta_mouse_blood %>% | |
left_join(delta_mouse_fc, by = "cpg") %>% | |
left_join(delta_mouse_hip, by = "cpg") %>% | |
mutate(delta_fc_blood = mean_fc - mean_blood, | |
delta_hip_blood = mean_hip - mean_blood, | |
delta_fc_hip = mean_fc - mean_hip, | |
functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error")), | |
functional_region_detailed = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer_1 ~ "Proximal Enhancer: Part 1", | |
cpg %in% proximal_enhancer_2 ~ "Proximal Enhancer: Part 2", | |
cpg %in% proximal_enhancer_3 ~ "Proximal Enhancer: Part 3", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error"))) | |
delta_for_plot <- data.frame(cpg = rep(delta_final_df$cpg, 3), | |
functional_region_detailed = rep(delta_final_df$functional_region_detailed, 3), | |
delta = c(delta_final_df$delta_fc_blood, | |
delta_final_df$delta_hip_blood, | |
delta_final_df$delta_fc_hip), | |
category = factor(c(rep("FC-Blood", 157), | |
rep("HIP-Blood", 157), | |
rep("FC-HIP", 157)), | |
levels = c("FC-Blood", "HIP-Blood", "FC-HIP"))) | |
# 6. Plot DNAm patterns across tissues/brain regions (samples) | |
color_theme <- c("darkred", "cornflowerblue", "lightgoldenrod3") # set color themes | |
plot_per_functional_unit <- function(functional_unit){ # function to make plots | |
methylation_df_long %>% | |
filter(functional_region_detailed == functional_unit) %>% | |
ggplot(aes(x = cpg, y = methylation, col = sample, group = sample)) + | |
geom_point(alpha = 0.5) + | |
geom_line(alpha = 0.3) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = paste0("DNA Methylation Patterns Across Tissues: ", functional_unit)) + | |
ylim(-5, 105) + | |
facet_wrap(~tissue) + | |
theme_bw() + | |
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 10, face = "bold"), | |
axis.title.x = element_text(size = 10, face = "bold"), | |
axis.text.x = element_text(size = 8, hjust = 1, angle = 45), | |
legend.position = "none") | |
} | |
plot_per_functional_unit("Intron 8") | |
plot_per_functional_unit("Intron 5") | |
plot_per_functional_unit("Intron 1") | |
plot_per_functional_unit("TSS") | |
plot_per_functional_unit("CTCF 5'UTR") | |
methylation_df_long %>% | |
filter(functional_region == "Proximal Enhancer", tissue == "Blood") %>% | |
ggplot(aes(x = cpg, y = methylation, col = sample, group = sample)) + | |
geom_point(alpha = 0.5) + | |
geom_line(alpha = 0.3) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = "DNA Methylation Patterns Blood: Proximal Enhancer") + | |
ylim(-5, 105) + | |
theme_bw() + | |
theme(plot.title = element_text(size = 18, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 8, hjust = 1, angle = 90), | |
axis.text.y = element_text(size = 8), | |
legend.position = "none") | |
methylation_df_long %>% | |
filter(functional_region == "Proximal Enhancer", tissue == "FC") %>% | |
ggplot(aes(x = cpg, y = methylation, col = sample, group = sample)) + | |
geom_point(alpha = 0.5) + | |
geom_line(alpha = 0.3) + | |
labs(x = "CpG", y = "DNAm Methylation [%]", | |
title = "DNA Methylation Patterns FC: Proximal Enhancer") + | |
ylim(-5, 105) + | |
theme_bw() + | |
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 8, hjust = 1, angle = 90), | |
legend.position = "none") | |
methylation_df_long %>% | |
filter(functional_region == "Proximal Enhancer", tissue == "HIP") %>% | |
ggplot(aes(x = cpg, y = methylation, col = sample, group = sample)) + | |
geom_point(alpha = 0.5) + | |
geom_line(alpha = 0.3) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = "DNA Methylation Patterns HIP: Proximal Enhancer") + | |
ylim(-5, 105) + | |
theme_bw() + | |
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 8, hjust = 1, angle = 90), | |
legend.position = "none") | |
# 7. Plot DNAm patterns across tissues/brain regions (mean and standard deviation) | |
plot_per_functional_unit <- function(functional_unit){ | |
methylation_mean_sd_status %>% # only unreated samples here | |
filter(functional_region_detailed == functional_unit, ELS_status == "Control") %>% | |
ggplot(aes(x = cpg, y = mean, col = tissue, group = tissue)) + | |
geom_point(alpha = 0.7) + | |
geom_line(size = 1) + | |
scale_color_manual(values=color_theme) + | |
scale_y_continuous(limits = c(0,100), breaks=seq(0,100,10)) + | |
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), width = .2, size = 1) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = functional_unit) + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 14, hjust = 1, angle = 45), | |
axis.text.y = element_text(size = 12), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") | |
} | |
plot_per_functional_unit("Intron 8") | |
plot_per_functional_unit("Intron 5") | |
plot_per_functional_unit("Intron 1") | |
plot_per_functional_unit("CTCF 5'UTR") | |
methylation_mean_sd_status %>% | |
filter(functional_region == "TSS", ELS_status == "Control") %>% | |
ggplot(aes(x = cpg, y = mean, col = tissue, group = tissue)) + | |
geom_point(alpha = 0.7) + | |
geom_line(size = 0.4) + | |
scale_color_manual(values=color_theme) + | |
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), width = .2) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = "TSS") + | |
theme_bw() + | |
scale_y_continuous(limits = c(-5,100), breaks=seq(0,100,10)) + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 14, hjust = 1, angle = 45), | |
axis.text.y = element_text(size = 14), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") | |
methylation_mean_sd_status %>% | |
filter(functional_region_detailed == "Proximal Enhancer: Part 1", ELS_status == "Control") %>% | |
ggplot(aes(x = cpg, y = mean, col = tissue, group = tissue)) + | |
geom_point(alpha = 0.7) + | |
geom_line(size = 1) + | |
scale_color_manual(values=color_theme) + | |
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), width = .2, size = 1) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = "Proximal Enhancer: Part 1") + | |
theme_bw() + | |
scale_y_continuous(limits = c(-5,100), breaks=seq(0,100,10)) + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 14, hjust = 1, angle = 45), | |
axis.text.y = element_text(size = 14), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") | |
methylation_mean_sd_status %>% | |
filter(functional_region_detailed == "Proximal Enhancer: Part 2", ELS_status == "Control") %>% | |
ggplot(aes(x = cpg, y = mean, col = tissue, group = tissue)) + | |
geom_point(alpha = 0.7) + | |
geom_line(size = 1) + | |
scale_color_manual(values=color_theme) + | |
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), width = .2, size = 1) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = "Proximal Enhancer: Part 2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(-5,100), breaks=seq(0,100,10)) + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 14, hjust = 1, angle = 45), | |
axis.text.y = element_text(size = 14), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") | |
methylation_mean_sd_status %>% | |
filter(functional_region_detailed == "Proximal Enhancer: Part 3", ELS_status == "Control") %>% | |
ggplot(aes(x = cpg, y = mean, col = tissue, group = tissue)) + | |
geom_point(alpha = 0.7) + | |
geom_line(size = 1) + | |
scale_color_manual(values=color_theme) + | |
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), width = .2, size = 1) + | |
labs(x = "CpG", y = "DNA Methylation [%]", | |
title = "Proximal Enhancer: Part 3") + | |
theme_bw() + | |
scale_y_continuous(limits = c(-5,100), breaks=seq(0,100,10)) + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 14, hjust = 1, angle = 45), | |
axis.text.y = element_text(size = 14), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") | |
# 8. Plot delta DNAm patterns across tissues/brain regions | |
plot_delta <- function(functional_unit){ | |
delta_for_plot %>% | |
filter(functional_region_detailed == functional_unit) %>% | |
ggplot(aes(x = cpg, y = delta, group = category)) + | |
geom_line(size = 1, aes(linetype = category, color = category)) + | |
geom_point(aes(color = category)) + | |
geom_hline(color = "darkred", yintercept = 0) + | |
scale_y_continuous(limits = c(-80,80), breaks=seq(-80,80,10)) + | |
scale_linetype_manual(values=c("solid", "twodash", "dotted")) + | |
scale_color_manual(values = c("#3B528BFF", "#21908CFF", "#5DC963FF")) + | |
labs(x = "CpG", y = Delta ~ "DNA Methylation [%]", | |
title = functional_unit) + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 14, hjust = 1, angle = 45), | |
axis.text.y = element_text(size = 12), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") | |
} | |
plot_delta("Intron 8") | |
plot_delta("Intron 5") | |
plot_delta("Intron 1") | |
plot_delta("TSS") | |
plot_delta("Proximal Enhancer: Part 3") | |
plot_delta("Proximal Enhancer: Part 2") | |
plot_delta("Proximal Enhancer: Part 1") | |
plot_delta("CTCF 5'UTR") | |
# 9. Filter CpGs with low variance at baseline (IQR=<1) | |
iqr_cpg_tissue <- data_df %>% | |
filter(ELS_status == "Control") %>% | |
select(-c(ELS_status,row, column)) %>% | |
group_by(tissue) %>% | |
summarise(across(-c(sample), IQR, na.rm = TRUE)) %>% | |
pivot_longer(cols = -tissue, names_to = "cpg", values_to = "IQR") %>% | |
arrange(desc(IQR)) %>% | |
mutate(functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error"))) | |
iqr_cpg_tissue %>% | |
ggplot(aes(x = IQR)) + | |
geom_histogram() + | |
geom_vline(color = "darkred", xintercept = 1) + | |
facet_wrap(~tissue) + | |
theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 10, face = "bold"), | |
axis.title.x = element_text(size = 10, face = "bold"), | |
axis.text.x = element_text(size = 5, hjust = 1, angle = 45), | |
legend.background = element_rect(fill = "lightgray", size = 0.5, | |
linetype = "solid", colour = "darkgray"), | |
legend.title = element_blank(), | |
legend.position = "bottom") + | |
theme_bw() + | |
scale_x_continuous(breaks = seq(0, 25, by = 1)) + | |
scale_y_continuous(breaks = seq(0, 100, by = 10)) | |
cpgs_remain_blood <- iqr_cpg_tissue %>% filter(tissue == "Blood", IQR > 1) %>% pull(cpg) | |
print(paste0("CpGs remained for final analysis in blood: ", length(cpgs_remain_blood))) | |
cpgs_remain_fc <- iqr_cpg_tissue %>% filter(tissue == "FC", IQR > 1) %>% pull(cpg) | |
print(paste0("CpGs remained for final analysis in FC: ", length(cpgs_remain_fc))) | |
cpgs_remain_hip <- iqr_cpg_tissue %>% filter(tissue == "HIP", IQR > 1) %>% pull(cpg) | |
print(paste0("CpGs remained for final analysis in HIP: ", length(cpgs_remain_hip))) | |
reduced_df_mval_long_blood <- methylation_mval_long %>% filter(tissue == "Blood", cpg %in% cpgs_remain_blood) | |
reduced_df_mval_long_fc <- methylation_mval_long %>% filter(tissue == "FC", cpg %in% cpgs_remain_fc) | |
reduced_df_mval_long_hip <- methylation_mval_long %>% filter(tissue == "HIP", cpg %in% cpgs_remain_hip) | |
reduced_df_mval_all_tissues <- rbind(reduced_df_mval_long_blood, reduced_df_mval_long_fc, reduced_df_mval_long_hip) | |
# 10. Perform linear regression in each tissue/brain region to study ELS effects on DNAm of each CpG | |
lm_summary <- function(tissue_region){ # function to run linear regressions in each tissue separately | |
data_sub <- reduced_df_mval_all_tissues %>% | |
pivot_wider(id_cols = sample, names_from = cpg, values_from = methylation) %>% | |
left_join(covariates %>% select(sample, tissue, ELS_status)) %>% | |
filter(tissue == tissue_region) | |
results <- list() | |
for(i in outcome){ | |
f <- as.formula(paste0("`", i, "`", "~ ELS_status")) | |
tidy <- tidy(lm(formula = f, data = data_sub, na.action = na.omit)) | |
results[[i]] <- tidy | |
} | |
results | |
} | |
## Blood | |
outcome <- cpgs_remain_blood %>% sort() | |
lm_blood <- lm_summary("Blood") | |
lm_blood_df <- purrr::map_dfr(lm_blood, ~ .x, .id = "cpg") %>% | |
mutate(q.value = p.adjust(p.value, method = "fdr"), | |
p.signif = case_when(p.value < 0.05 & p.value >= 0.01 ~ "*", | |
p.value < 0.01 & p.value >= 0.001 ~ "**", | |
p.value < 0.001 & p.value >= 0.0001 ~ "***", | |
p.value < 0.0001 ~ "****", | |
p.value > 0.05 ~ "ns"), | |
q.signif = case_when(q.value < 0.05 & q.value >= 0.01 ~ "*", | |
q.value < 0.01 & q.value >= 0.001 ~ "**", | |
q.value < 0.001 & q.value >= 0.0001 ~ "***", | |
q.value < 0.0001 ~ "****", | |
q.value > 0.05 ~ "ns"), | |
functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error"))) | |
lm_blood_df %>% filter(term != "(Intercept)", (p.signif != "ns" | q.signif != "ns")) | |
## Frontal cortex | |
outcome <- cpgs_remain_fc %>% sort() | |
lm_fc <- lm_summary("FC") | |
lm_fc_df <- purrr::map_dfr(lm_fc, ~ .x, .id = "cpg") %>% | |
mutate(q.value = p.adjust(p.value, method = "fdr"), | |
p.signif = case_when(p.value < 0.05 & p.value >= 0.01 ~ "*", | |
p.value < 0.01 & p.value >= 0.001 ~ "**", | |
p.value < 0.001 & p.value >= 0.0001 ~ "***", | |
p.value < 0.0001 ~ "****", | |
p.value > 0.05 ~ "ns"), | |
q.signif = case_when(q.value < 0.05 & q.value >= 0.01 ~ "*", | |
q.value < 0.01 & q.value >= 0.001 ~ "**", | |
q.value < 0.001 & q.value >= 0.0001 ~ "***", | |
q.value < 0.0001 ~ "****", | |
q.value > 0.05 ~ "ns"), | |
functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error"))) | |
lm_fc_df %>% filter(term != "(Intercept)", (p.signif != "ns" | q.signif != "ns")) | |
## Hippocampus | |
outcome <- cpgs_remain_hip %>% sort() | |
lm_hip <- lm_summary("HIP") | |
lm_hip_df <- purrr::map_dfr(lm_hip, ~ .x, .id = "cpg") %>% | |
mutate(q.value = p.adjust(p.value, method = "fdr"), | |
p.signif = case_when(p.value < 0.05 & p.value >= 0.01 ~ "*", | |
p.value < 0.01 & p.value >= 0.001 ~ "**", | |
p.value < 0.001 & p.value >= 0.0001 ~ "***", | |
p.value < 0.0001 ~ "****", | |
p.value > 0.05 ~ "ns"), | |
q.signif = case_when(q.value < 0.05 & q.value >= 0.01 ~ "*", | |
q.value < 0.01 & q.value >= 0.001 ~ "**", | |
q.value < 0.001 & q.value >= 0.0001 ~ "***", | |
q.value < 0.0001 ~ "****", | |
q.value > 0.05 ~ "ns"), | |
functional_region = as.factor(case_when( | |
cpg %in% intron_8 ~ "Intron 8", | |
cpg %in% intron_5 ~ "Intron 5", | |
cpg %in% intron_1 ~ "Intron 1", | |
cpg %in% tss ~ "TSS", | |
cpg %in% proximal_enhancer ~ "Proximal Enhancer", | |
cpg %in% ctcf_5UTR ~ "CTCF 5'UTR", | |
TRUE ~ "error"))) | |
lm_hip_df %>% filter(term != "(Intercept)", (p.signif != "ns" | q.signif != "ns")) | |
# 11. Visualize significant CpGs | |
## Blood | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28557969`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title = "Intron 5 - 28557969", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28557969`, na.rm = T)), | |
round(max(data_df$`28557969`, na.rm = T)), by = 4)) | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28579496`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title ="Intron 1 - 28579496", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28579496`, na.rm = T)), | |
round(max(data_df$`28579496`, na.rm = T)), by = 4)) | |
## FC | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28557729`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title = "Intron 5 - 28557729", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28557729`, na.rm = T)), | |
round(max(data_df$`28557729`, na.rm = T)), by = 4)) | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28649771`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title ="Proximal Enhancer - 28649771", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28649771`, na.rm = T)), | |
round(max(data_df$`28649771`, na.rm = T)), by = 4)) | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28649785`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title ="Proximal Enhancer - 28649785", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28649785`, na.rm = T)), | |
round(max(data_df$`28649785`, na.rm = T)), by = 4)) | |
## HIP | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28657196`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title = "CTCF 5'UTR - 28657196", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28657196`, na.rm = T)), | |
round(max(data_df$`28657196`, na.rm = T)), by = 4)) | |
data_df %>% | |
ggplot(aes(x = ELS_status, y = `28657385`)) + | |
geom_boxplot(aes(fill = ELS_status), alpha = 0.7, outlier.size = 1) + | |
geom_point(aes(fill = ELS_status), alpha = 0.7, | |
position = position_jitterdodge(jitter.height = 0, | |
jitter.width = 0.2), size = 1) + | |
facet_wrap(~ tissue) + | |
scale_fill_manual(values = c("snow2", "seagreen4")) + | |
labs(title = "CTCF 5'UTR - 28657385", y = "DNA Methylation [%]") + | |
theme_bw() + | |
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.title.x = element_blank(), | |
axis.text.x = element_text(size = 16, face = "bold"), | |
axis.text.y = element_text(size = 14), | |
legend.position = "none") + | |
stat_compare_means(aes(group = ELS_status, label = paste0("p = ", ..p.format.., ..p.signif..)), | |
method = "t.test", size = 5) + | |
scale_y_continuous(breaks = seq(round(min(data_df$`28657385`, na.rm = T)), | |
round(max(data_df$`28657385`, na.rm = T)), by = 2)) | |
# Session information | |
utils:::print.sessionInfo(sessionInfo()[-8]) | |
# End of script |