Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Performance_Epigenetic_Aging_Estimators_EPIC_v1.0_v2.0/analysis_script.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
831 lines (758 sloc)
37.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Analysis script for the paper: | |
# "Performance of epigenetic aging estimators from peripheral blood using | |
# Infinium MethylationEPIC v2.0 vs. v1.0 BeadChip arrays" | |
# | |
# ============================================================================== | |
# | |
# Author: Natan Yusupov, natan_yusupov@psych.mpg.de | |
# | |
# Description: The script analyses DNAm data from human peripheral blood | |
# quantified with the recently introduced Infinium MethylationEPIC BeadChip | |
# array v2.0 ((Illumina, San Diego, CA, USA) and its performance in applications | |
# related to epigenetic aging estimation. | |
# | |
# Summary of steps performed in the script "analysis_script.R": | |
# 1. Load libraries and data | |
# 2. Inspection of content EPIC v1.0 and v2.0 | |
# A. Number of CpGs | |
# B. Find duplicates of probes | |
# C. Find replicates of type I/II technology, strand TB and strand CO | |
# 3. Overlap of probes across EPIC array versions | |
# 4. Prepare data | |
# 5. Calculation of mean beta values, their standard deviations, median | |
# absolute deviation across subjects for both arrays, absolute delta mean | |
# of mean beta values between both arrays | |
# 6. Correlation of probes’ beta values between EPIC array versions | |
# 7. Probe-wise standard deviations of beta values in both EPIC array versions | |
# 8. EPIC version-related differences in DNA methylation quantification | |
# 9. Clocks probes available on EPIC v1.0 and v2.0 | |
# 10. Clock probes within probes with a absolute delta mean over 0.1 of mean | |
# beta values between both arrays | |
# 11. Calculation of epigenetic age estimators | |
# A. Calculation of DNAmAge & DNAmAge Acceleration for following estimators: | |
# Hannum, Horvath, PhenoAge | |
# B. Calculation of Pace of aging for following estimators: DunedinPoAm, DunedinPACE | |
# 12. Comparison of biological age estimators in our sample | |
# A. DNAmAge | |
# B. DNAmAge acceleration | |
# C. Pace of aging | |
# 13. Absolute delta of biological age estimators within individual across and within platforms | |
# A. DNAmAge | |
# B. DNAmAge acceleration | |
# C. Pace of aging | |
# 14. Mean and SD of absolute delta of biological age estimators | |
# 15. Estimator-wise comparison of within-person variation across EPIC arrays | |
# A. DNAmAge | |
# B. DNAmAge acceleration | |
# C. Pace of aging | |
# 16. Mean and SD of absolute delta of biological age estimators in each age group | |
# 17. Estimator-wise comparison of within-person variation across EPIC arrays by age group | |
# A. DNAmAge | |
# B. DNAmAge acceleration | |
# C. Pace of aging | |
# 18. Percent of probes with low correlation across individuals among probes | |
# with low reliability available from comparison by Sugdan et al. 2020 | |
# ============================================================================== | |
# 1. Load libraries and data | |
library(utils) | |
library(tidyverse) | |
library(eulerr) | |
library(purrr) | |
library(RColorBrewer) | |
library(ggpubr) | |
library(IlluminaHumanMethylationEPICmanifest) | |
library(IlluminaHumanMethylationEPICv2manifest) | |
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38) | |
library(readxl) | |
library(writexl) | |
library(ggh4x) | |
library(methylclock) | |
library(DunedinPoAm38) | |
library(DunedinPACE) | |
library(IlluminaHumanMethylationEPICmanifest) | |
library(IlluminaHumanMethylationEPICv2manifest) | |
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38) | |
# data for general analysis | |
annotation_epic_v2 <- getAnnotation(IlluminaHumanMethylationEPICv2anno.20a1.hg38) | |
manifest_v1 <- getManifestInfo(IlluminaHumanMethylationEPICmanifest, "locusNames") | |
manifest_v2 <- getManifestInfo(IlluminaHumanMethylationEPICv2manifest, "locusNames") | |
load("Betas_clean_quantile_filtered_v1.Rdata") # V1 preprocessed, normalized and filtered beta | |
betas_v1 <- Betas_clean_quantile_filtered_v1 | |
load("Betas_clean_quantile_filtered_v2.Rdata") # V2 preprocessed, normalized and filtered beta | |
betas_v2 <- Betas_clean_quantile_filtered_v2 | |
# data for epigenetic clocks analysis | |
load("Betas_clean_quantile_bmiq_v1.Rdata") | |
epic_v1 <- Betas_clean_quantile_bmiq_v1 | |
load("Betas_clean_quantile_bmiq_v2.Rdata") | |
epic_v2 <- Betas_clean_quantile_bmiq_v2 | |
load("predicted_age_v1_2020") # priorly calculated epigenetic age | |
# Clock CpGs of the epigenetic age estimators used in the analysis | |
read_xlsx("clocks_cpgs.xlsx") | |
# data from Sugden et al. 2020, doi: 10.1016/j.patter.2020.100014. PMID: 32885222; PMCID: PMC7467214 | |
low_reliability_sugden <- read_xlsx("../info/low_reliability_sugden.xlsx") | |
# 2. Inspection of content EPIC v1.0 and v2.0 | |
# A. Number of probes | |
manifest_v1 <- unique(manifest_v1) | |
length(manifest_v1) | |
manifest_v2 <- str_replace(manifest_v2, "_[:alnum:]+$", "") # remove special annotation ending from v2.0 for comparison | |
manifest_v2 <- unique(manifest_v2) | |
length(manifest_v2) | |
annotation_epic_v2 <- annotation_epic_v2 %>% | |
as.data.frame() %>% | |
separate(Name, c("EPICv2_Loci", "classifier"), "_", remove = FALSE) | |
# B. Find duplicates of probes | |
duplicated_vector <- annotation_epic_v2 %>% | |
filter(duplicated(EPICv2_Loci)) %>% pull(EPICv2_Loci) | |
all_duplicates_df <- annotation_epic_v2 %>% | |
filter(EPICv2_Loci %in% duplicated_vector) %>% arrange(EPICv2_Loci) | |
# C. Find replicates of type I/II technology, strand TB and strand CO | |
replicated_type12_illuminastrand_assaystrand_df <- all_duplicates_df %>% | |
mutate(Type = as.factor(Type)) %>% | |
group_by(EPICv2_Loci) %>% | |
filter(n_distinct(Type) > 1 | n_distinct(Strand_TB) > 1 | n_distinct(Strand_CO) > 1) | |
replicated_type12_illuminastrand_assaystrand <- replicated_type12_illuminastrand_assaystrand_df %>% | |
distinct(EPICv2_Loci) %>% pull(EPICv2_Loci) | |
# 3. Overlap of probes across EPIC array versions | |
supplemenatry_figure_1 <- plot(euler(list(`EPIC v1.0` = manifest_v1, `EPIC v2.0` = manifest_v2)), | |
key = TRUE, counts = TRUE, input = "union", alpha = 0.5, | |
quantities = list(type = c("counts","percent"), cex = 1.2), | |
fills = list(fill = c("royalblue4", "steelblue4", "lightskyblue")), | |
edges = list(lty = 2), factor_names = TRUE, | |
labels = list(font=2, cex=2), legend = FALSE, | |
main = expression(~bold("Comparison of Total Probes"))) | |
supplemenatry_figure_1 | |
# 4. Prepare data | |
differring_cpg_names <- annotation_epic_v2 %>% | |
mutate_all(na_if, "") %>% | |
filter(!EPICv2_Loci %in% duplicated_vector) %>% | |
mutate(identical = ifelse(EPICv2_Loci == EPICv1_Loci, "yes", "no")) %>% | |
filter(identical == "no") | |
differring_cpg_names_v1 <- differring_cpg_names %>% pull(EPICv1_Loci) | |
differring_cpg_names_v2 <- differring_cpg_names %>% pull(EPICv2_Loci) | |
common_cpgs <- intersect(manifest_v1, manifest_v2) | |
betas_v1_df <- betas_v1 %>% | |
as.data.frame() %>% | |
rownames_to_column("EPIC_Loci") %>% | |
filter(!EPIC_Loci %in% differring_cpg_names_v1) %>% | |
filter(EPIC_Loci %in% common_cpgs) %>% | |
column_to_rownames("EPIC_Loci") %>% | |
as.matrix() %>% | |
t() %>% | |
as.data.frame() | |
betas_v2_df <- betas_v2 %>% | |
as.data.frame() %>% | |
rownames_to_column("cpg") %>% | |
separate(cpg, c("EPIC_Loci", "classifier"), "_") %>% | |
distinct(EPIC_Loci, .keep_all = TRUE) %>% | |
dplyr::select(-classifier) %>% | |
filter(EPIC_Loci %in% colnames(betas_v1_df)) %>% | |
column_to_rownames("EPIC_Loci") %>% | |
as.matrix() %>% | |
t() %>% | |
as.data.frame() | |
betas_v1_df <- betas_v1_df %>% | |
dplyr::select(colnames(betas_v2_df)) | |
# 5. Calculation of mean beta values, their standard deviations, median | |
# absolute deviation across subjects for both arrays, absolute delta mean | |
# of mean beta values between both arrays | |
betas_v1_df_long <- betas_v1_df %>% | |
pivot_longer(cols = everything(), names_to = "cpg", values_to = "beta") %>% | |
group_by(cpg) %>% | |
summarize(mean_beta_v1 = mean(beta), | |
sd_beta_v1 = sd(beta), | |
mad_beta_v1 = mad(beta)) | |
betas_v2_df_long <- betas_v2_df %>% | |
pivot_longer(cols = everything(), names_to = "cpg", values_to = "beta") %>% | |
group_by(cpg) %>% | |
summarize(mean_beta_v2 = mean(beta), | |
sd_beta_v2 = sd(beta), | |
mad_beta_v2 = mad(beta)) | |
joined_df <- betas_v1_df_long %>% | |
left_join(betas_v2_df_long, by = "cpg") %>% | |
mutate(delta_mean = abs(mean_beta_v2 - mean_beta_v1)) | |
# 6. Correlation of probes’ beta values between EPIC array versions | |
Supplementary_Figure_2 <- joined_df %>% | |
ggplot(aes(x = mean_beta_v1, y = mean_beta_v2)) + | |
geom_point(alpha = 0.25, color = 4, fill = 4) + | |
geom_smooth(method = "lm", se = FALSE) + | |
theme_bw() + | |
labs(x = "Mean Beta Value (EPIC v1.0)", y = "Mean Beta Value (EPIC v2.0)") + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12)) + | |
stat_cor(method="pearson") | |
Supplementary_Figure_2 | |
# 7. Probe-wise standard deviations of beta values in both EPIC array versions | |
Supplementary_Figure_3A_1 <- joined_df %>% | |
ggplot(aes(x = sd_beta_v1)) + | |
geom_histogram(alpha = 0.25, color = 4, fill = 4, bins = 100) + | |
theme_bw() + | |
labs(x = "Standard Deviation of Beta Value", | |
title = "Probe-Wise Standard Deviations (EPIC v1.0)") + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold")) + | |
scale_y_continuous(labels = function(x) format(x, scientific = FALSE), | |
breaks = seq(0, 150000, 25000)) | |
Supplementary_Figure_3A_1 | |
Supplementary_Figure_3A_2 <- joined_df %>% | |
ggplot(aes(x = sd_beta_v1)) + | |
geom_boxplot(alpha = 0.25, color = 4, fill = 4) + | |
labs(x = "Standard Deviation of Beta Value") + | |
theme_bw() + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_blank(), axis.ticks.y = element_blank()) | |
Supplementary_Figure_3A_2 | |
Supplementary_Figure_3B_1 <- joined_df %>% | |
ggplot(aes(x = sd_beta_v2)) + | |
geom_histogram(alpha = 0.25, color = 4, fill = 4, bins = 100) + | |
theme_bw() + | |
labs(x = "Standard Deviation of Beta Value", | |
title = "Probe-Wise Standard Deviations (EPIC v2.0)") + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold")) + | |
scale_y_continuous(labels = function(x) format(x, scientific = FALSE), | |
breaks = seq(0, 150000, 25000)) | |
Supplementary_Figure_3B_1 | |
Supplementary_Figure_3B_2 <- joined_df %>% | |
ggplot(aes(x = sd_beta_v2)) + | |
geom_boxplot(alpha = 0.25, color = 4, fill = 4) + | |
labs(x = "Standard Deviation of Beta Value") + | |
theme_bw() + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_blank(), axis.ticks.y = element_blank()) | |
Supplementary_Figure_3B_2 | |
mean(joined_df$mad_beta_v1) | |
sd(joined_df$mad_beta_v1) | |
count(joined_df$sd_beta_v1 > 0.1) | |
count(joined_df$sd_beta_v1 > 0.1)/nrow(joined_df)*100 | |
mean(joined_df$mad_beta_v2) | |
count(joined_df$sd_beta_v2 > 0.1) | |
count(joined_df$sd_beta_v2 > 0.1)/nrow(joined_df)*100 | |
# 8. EPIC version-related differences in DNA methylation quantification | |
Supplementary_Figure_4_1 <- joined_df %>% | |
ggplot(aes(x = delta_mean)) + | |
geom_histogram(alpha = 0.25, color = 4, fill = 4, bins = 100) + | |
theme_bw() + | |
labs(x = "Absolute"~Delta~"Mean of Beta Values", | |
title = "Probe-Wise Absolute"~Delta~"Mean of Beta Values Between EPIC Versions") + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 14, face = "bold")) | |
Supplementary_Figure_4_1 | |
Supplementary_Figure_4_2 <- joined_df %>% | |
ggplot(aes(x = delta_mean)) + | |
geom_boxplot(alpha = 0.25, color = 4, fill = 4) + | |
labs(x = "Absolute"~Delta~"Mean of Beta Values") + | |
theme_bw() + | |
theme(axis.title.x = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_blank(), axis.ticks.y = element_blank()) | |
Supplementary_Figure_4_2 | |
mean(joined_df$delta_mean) | |
sd(joined_df$delta_mean) | |
count(joined_df$delta_mean > 0.1) | |
count(joined_df$delta_mean > 0.1)/nrow(joined_df)*100 | |
# 9. Clock probes available on EPIC v1.0 and v2.0 | |
df_clocks_v1_v2 <- data.frame() | |
for (i in colnames(clocks_cpgs)){ | |
clock_name <- i | |
cpg_names <- clocks_cpgs %>% dplyr::select(.data[[i]]) %>% na.omit() %>% pull(.data[[i]]) | |
length_clock_cpgs <- length(cpg_names) | |
length_intersect_v1 <- length(intersect(manifest_v1, cpg_names)) | |
percent_interect_v1 <- length_intersect_v1/length_clock_cpgs*100 | |
length_intersect_v2 <- length(intersect(manifest_v2, cpg_names)) | |
percent_interect_v2 <- length_intersect_v2/length_clock_cpgs*100 | |
output <- data.frame(Algorithm = clock_name, | |
N_clock_cpgs = length_clock_cpgs, | |
N_available_v1 = length_intersect_v1, | |
Percent_available_v1 = percent_interect_v1, | |
N_available_v2 = length_intersect_v2, | |
Percent_available_v2 = percent_interect_v2) | |
df_clocks_v1_v2 <- rbind(df_clocks_v1_v2, output) | |
} | |
df_clocks_v1_v2 <- df_clocks_v1_v2 %>% | |
mutate(Algorithm = c("Horvath", "Hannum", "PhenoAge", "DunedinPoAm", "DunedinPACE"), | |
Percent_available_v1 = round(Percent_available_v1), | |
Percent_available_v2 = round(Percent_available_v2)) | |
colnames(df_clocks_v1_v2) <- c("Biological Age Estimator", "N of Clock CpGs", | |
"N of Available CpGS V1", "Percentage of Available CpGS V1", | |
"N of Available CpGS V2", "Percentage of Available CpGS V2") | |
# 10. Clock probes within probes with a absolute delta mean over 0.1 of mean | |
# beta values between both arrays | |
probes_deltamean_over_0.1_df <- joined_df %>% filter(delta_mean > 0.1) | |
intersect(clocks_cpgs$horvath_cpg, probes_deltamean_over_0.1_df$cpg) | |
intersect(clocks_cpgs$hannum_cpg, probes_deltamean_over_0.1_df$cpg) | |
intersect(clocks_cpgs$phenoage_cpg, probes_deltamean_over_0.1_df$cpg) | |
intersect(clocks_cpgs$dunedinpoam_cpg, probes_deltamean_over_0.1_df$cpg) | |
intersect(clocks_cpgs$dunedinpace_cpg, probes_deltamean_over_0.1_df$cpg) | |
# 11. Calculation of epigenetic age estimators | |
# A. Calculation of DNAmAge & DNAmAge Acceleration for following estimators: | |
# Hannum, Horvath, PhenoAge | |
age <- predicted_age_v1_2020$age | |
predicted_age_v1_2023 <- DNAmAge(epic_v1, clocks = c("Horvath", "Hannum", "Levine"), | |
age = age, normalize = FALSE, cell.count = FALSE) | |
epic_v2 <- epic_v2 %>% | |
as.data.frame() %>% | |
rownames_to_column("Name") %>% | |
left_join(annotation_epic_v2 %>% dplyr::select(Name, EPICv2_Loci, Rep_Num), by = "Name") | |
mean_specific_epic_v2 <- epic_v2 %>% | |
filter(EPICv2_Loci %in% replicated_type12_illuminastrand_assaystrand) %>% | |
dplyr::select(-Rep_Num) %>% | |
pivot_longer(cols = -c(EPICv2_Loci, Name), names_to = "sample", values_to = "beta") %>% | |
group_by(sample, EPICv2_Loci) %>% | |
summarise(mean_beta = mean(beta)) %>% | |
ungroup() %>% | |
pivot_wider(names_from = "sample", values_from = "mean_beta") %>% | |
column_to_rownames("EPICv2_Loci") | |
epic_v2 <- epic_v2 %>% | |
filter(Rep_Num == "1") %>% | |
filter(!EPICv2_Loci %in% replicated_type12_illuminastrand_assaystrand) %>% | |
dplyr::select(-Name, -Rep_Num) %>% | |
column_to_rownames("EPICv2_Loci") %>% | |
bind_rows(mean_specific_epic_v2) %>% | |
as.matrix() | |
predicted_age_v2 <- DNAmAge(epic_v2, clocks = c("Horvath", "Hannum", "Levine"), | |
age = age, normalize = FALSE, cell.count = FALSE) | |
# B. Calculation of Pace of aging for following estimators: DunedinPoAm, DunedinPACE | |
dunedinpoam_df <- as.data.frame(DunedinPoAm38::PoAmProjector(betas = epic_v1)) | |
dunedinpoam_df <- dunedinpoam_df %>% | |
dplyr::rename(DunedinPoAm = "DunedinPoAm_38") %>% rownames_to_column("arrayid") | |
dunedinpace_df <- as.data.frame(DunedinPACE::PACEProjector(betas = epic_v1)) | |
dunedinpace_df <- dunedinpace_df %>% rownames_to_column("arrayid") | |
predicted_age_v1_2023 <- predicted_age_v1_2023 %>% | |
dplyr::select(arrayid = "id", -age, Horvath, AgeAccelHorvath = "ageAcc2.Horvath", | |
Hannum, AgeAccelHannum = "ageAcc2.Hannum", PhenoAge = "Levine", | |
AgeAccelPheno = "ageAcc2.Levine") %>% | |
left_join(dunedinpoam_df, by = "arrayid") %>% | |
left_join(dunedinpace_df, by = "arrayid") %>% | |
mutate(Array = "v1.0_2023") %>% | |
left_join(predicted_age_v1_2020 %>% dplyr::select(sampleId, arrayid, age, sex), by = "arrayid") | |
dunedinpoam_df <- as.data.frame(DunedinPoAm38::PoAmProjector(betas = epic_v2, | |
proportionOfProbesRequired = 0.6)) | |
dunedinpoam_df <- dunedinpoam_df %>% | |
dplyr::rename(DunedinPoAm = "DunedinPoAm_38") %>% rownames_to_column("arrayid") | |
dunedinpace_df <- as.data.frame(DunedinPACE::PACEProjector(betas = epic_v2)) | |
dunedinpace_df <- dunedinpace_df %>% rownames_to_column("arrayid") | |
predicted_age_v2 <- predicted_age_v2 %>% | |
dplyr::select(arrayid = "id", -age, Horvath, AgeAccelHorvath = "ageAcc2.Horvath", | |
Hannum, AgeAccelHannum = "ageAcc2.Hannum", PhenoAge = "Levine", | |
AgeAccelPheno = "ageAcc2.Levine") %>% | |
left_join(dunedinpoam_df, by = "arrayid") %>% | |
left_join(dunedinpace_df, by = "arrayid") %>% | |
mutate(Array = "v2.0") %>% | |
bind_cols(predicted_age_v1_2020 %>% dplyr::select(sampleId, age, sex)) | |
data_final <- bind_rows(predicted_age_v1_2020, predicted_age_v1_2023, predicted_age_v2) | |
data_final <- data_final %>% | |
mutate(Array = as.factor(Array), | |
age = as.factor(age)) %>% | |
dplyr::select(-arrayid, -sex) | |
age_levels <- levels(data_final$age) | |
data_final_long <- data_final %>% | |
pivot_longer(cols = -c(sampleId, Array, age), | |
names_to = "Measurement" , values_to = "value") %>% | |
mutate(Type = case_when(str_detect(Measurement, "AgeAccel") ~ "AgeAccel", | |
str_detect(Measurement, "Dunedin") ~ "Pace", | |
TRUE ~ "DNAmAge"), | |
sampleId = as.factor(sampleId), | |
Type = as.factor(Type), | |
Measurement = factor(Measurement, levels = c("Horvath", "Hannum", "PhenoAge", | |
"AgeAccelHorvath", "AgeAccelHannum", | |
"AgeAccelPheno", "DunedinPoAm", "DunedinPACE"))) | |
# 12. Comparison of biological age estimators in our sample | |
# A. DNAmAge | |
Figure_1A <- data_final_long %>% | |
filter(Type == "DNAmAge") %>% | |
ggplot(aes(x = sampleId, y = value, fill = Measurement)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
theme_bw() + | |
labs(y = "DNA Methylation Age (years)") + | |
theme(legend.position = c(0.1, 0.87), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) + | |
scale_y_continuous(limits = c(0, 85), breaks = seq(0, 90, 10)) + | |
geom_text(aes(label = Array), angle=90, | |
position = position_dodge2(width = 0.8), | |
size = 2.5, hjust = -0.25, vjust = 0.5, | |
show.legend = FALSE) + | |
scale_fill_brewer(palette = "Dark2") | |
Figure_1A | |
# B. DNAmAge acceleration | |
Figure_1B <- data_final_long %>% | |
filter(Type == "AgeAccel") %>% | |
mutate(Measurement = str_replace(Measurement, "AgeAccel", ""), | |
Measurement = str_replace(Measurement, "Pheno", "PhenoAge"), | |
Measurement = factor(Measurement, levels = c("Horvath", "Hannum", "PhenoAge")), | |
Array = as.character(Array)) %>% | |
ggplot(aes(x = sampleId, y = value, fill = Measurement)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.4), alpha = 0.8) + | |
theme_bw() + | |
labs(y = "DNA Methylation Age Acceleration (years)") + | |
theme(legend.position = c(0.1, 0.87), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) + | |
scale_y_continuous(limits = c(-12, 12), breaks = seq(-12, 12, 2)) + | |
geom_text(aes(label=ifelse(value > 0, Array, "")), | |
position = position_dodge2(width = 0.8), angle=90, | |
size = 2.5, hjust = -0.25, vjust = 0.5, | |
show.legend = FALSE) + | |
geom_text(aes(label=ifelse(value < 0, Array, "")), | |
position = position_dodge2(width = 0.8), angle=90, | |
size = 2.5, hjust = 1.25, vjust = 0.5, | |
show.legend = FALSE) + | |
scale_fill_brewer(palette = "Dark2") | |
Figure_1B | |
# C. Pace of aging | |
Figure_1C <- data_final_long %>% | |
filter(Type == "Pace") %>% | |
ggplot(aes(x = sampleId, y = value, fill = Array)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.4), alpha = 0.8) + | |
theme_bw() + | |
labs(y = "Years of Physiological Change per Chronological Year") + | |
theme(legend.position = c(0.1, 0.87), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) + | |
scale_y_continuous(limits = c(0, 1.6), breaks = seq(0, 1.6, 0.2)) + | |
geom_text(aes(label=Measurement), angle=90, | |
position = position_dodge2(width = 0.8), | |
size = 2.5, hjust = -0.25, vjust = 0.5, | |
show.legend = FALSE) + | |
scale_fill_brewer(palette = "Dark2") | |
Figure_1C | |
# 13. Absolute delta of biological age estimators within individual across and within platforms | |
# A. DNAmAge | |
data_final_diff_v1 <- data_final_long %>% | |
filter(Array != "v2.0") %>% | |
group_by(Type, Measurement, sampleId) %>% | |
mutate(Absolut_delta = abs(diff(value))) %>% | |
filter(Array != "v1.0_2020") %>% | |
dplyr::select(`Age Group` = age, sampleId, Type, Measurement, Absolut_delta) %>% | |
mutate(Comparison = as.factor("|v1.0-v1.0|")) | |
data_final_diff_v1_v2 <- data_final_long %>% | |
filter(Array != "v1.0_2020") %>% | |
group_by(Type, Measurement, sampleId) %>% | |
mutate(Absolut_delta = abs(diff(value))) %>% | |
filter(Array != "v1.0_2023") %>% | |
dplyr::select(`Age Group` = age, sampleId, Type, Measurement, Absolut_delta) %>% | |
mutate(Comparison = as.factor("|v2.0-v1.0|")) | |
Figure_2A <- data_final_diff_v1_v2 %>% | |
filter(Type == "DNAmAge") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
scale_fill_brewer(palette = "Dark2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0,26), breaks = seq(0, 26, 2)) + | |
labs(y = "DNA Methylation Age (years)", title = "Absolute"~Delta~"DNA Methylation Age", | |
subtitle = "|v2.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_2A | |
Figure_2B <- data_final_diff_v1 %>% | |
filter(Type == "DNAmAge") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
scale_fill_brewer(palette = "Dark2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 26), breaks = seq(0, 26, 2)) + | |
labs(y = "DNA Methylation Age (years)", title = "Absolute"~Delta~"DNA Methylation Age", | |
subtitle = "|v1.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_2B | |
# B. DNAmAge acceleration | |
Figure_2C <- data_final_diff_v1_v2 %>% | |
filter(Type == "AgeAccel") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
scale_fill_brewer(palette = "Dark2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, 1)) + | |
labs(y = "DNA Methylation Age Acceleration (years)", title = "Absolute"~Delta~"DNAm Age Acceleration", | |
subtitle = "|v2.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_2C | |
Figure_2D <- data_final_diff_v1 %>% | |
filter(Type == "AgeAccel") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
scale_fill_brewer(palette = "Dark2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, 1)) + | |
labs(y = "DNA Methylation Age Acceleration (years)", title = "Absolute"~Delta~"DNAm Age Acceleration", | |
subtitle = "|v1.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_2D | |
# C. Pace of aging | |
Figure_2E <- data_final_diff_v1_v2 %>% | |
filter(Type == "Pace") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
scale_fill_brewer(palette = "Dark2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) + | |
labs(y = "Years of Physiological Change per Chronological Year", | |
title = "Absolute"~Delta~"Pace of Aging", | |
subtitle = "|v2.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_2E | |
Figure_2F <- data_final_diff_v1 %>% | |
filter(Type == "Pace") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
scale_fill_brewer(palette = "Dark2") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) + | |
labs(y = "Years of Physiological Change per Chronological Year", | |
title = "Absolute"~Delta~"Pace of Aging", | |
subtitle = "|v1.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 16, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_2F | |
# 14. Mean and SD of absolute delta of biological age estimators | |
data_final_diff_complete <- bind_rows(data_final_diff_v1_v2, data_final_diff_v1) | |
mean_sd_absolute_delta_per_clock_v1_v2 <- data_final_diff_complete %>% | |
group_by(Comparison, Type, Measurement) %>% | |
summarise( | |
Mean = mean(Absolut_delta), | |
SD = sd(Absolut_delta)) | |
# 15. Estimator-wise comparison of within-person variation across EPIC arrays | |
# A. DNAmAge | |
Figure_3A <- data_final_diff_complete %>% | |
filter(Type == "DNAmAge") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, colour = Comparison, fill = Comparison)) + | |
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.5) + | |
geom_point(position = position_dodge(width = 0.8), alpha = 0.5) + | |
stat_compare_means(method = "wilcox.test", paired = FALSE, label = "..p.format..") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 26), breaks = seq(0, 26, 4)) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_colour_brewer(palette = "Dark2") + | |
labs(y = "Absolute"~Delta~"DNA Methylation Age (years)", | |
title = "Within-Person Variation in DNA Methylation Age Across Arrays", | |
subtitle = "|v2.0-v1.0| vs. |v1.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 13, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_3A | |
# B. DNAmAge acceleration | |
Figure_3B <- data_final_diff_complete %>% | |
filter(Type == "AgeAccel") %>% | |
mutate(Measurement = as.factor(str_replace(Measurement, "AgeAccel", ""))) %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, colour = Comparison, fill = Comparison)) + | |
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.5) + | |
geom_point(position = position_dodge(width = 0.8), alpha = 0.5) + | |
stat_compare_means(method = "wilcox.test", paired = FALSE, label = "..p.format..") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 26), breaks = seq(0, 26, 2)) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_colour_brewer(palette = "Dark2") + | |
labs(y = "Absolute"~Delta~"DNA Methylation Age Acceleration (years)", | |
title = "Within-Person Variation in DNAm Age Acceleration Across Arrays", | |
subtitle = "|v2.0-v1.0| vs. |v1.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 13, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_3B | |
# C. Pace of aging | |
Figure_3C <- data_final_diff_complete %>% | |
filter(Type == "Pace") %>% | |
ggplot(aes(x = Measurement, y = Absolut_delta, colour = Comparison, fill = Comparison)) + | |
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.5) + | |
geom_point(position = position_dodge(width = 0.8), alpha = 0.5) + | |
stat_compare_means(method = "wilcox.test", paired = FALSE, label = "..p.format..") + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 0.8), breaks = seq(0, 0.8, 0.1)) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_colour_brewer(palette = "Dark2") + | |
labs(y = "Absolute"~Delta~"Years of Physiological Change per Chronological Year", | |
title = "Within-Person Variation in Pace of Aging Across Arrays", | |
subtitle = "|v2.0-v1.0| vs. |v1.0-v1.0|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 12, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 13, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Figure_3C | |
# 16. Mean and SD of absolute delta of biological age estimators in each age group | |
mean_sd_absolute_delta_per_clock_v1_v2_per_group <- data_final_diff_complete %>% | |
group_by(Comparison, `Age Group`, Type, Measurement) %>% | |
summarise( | |
Mean = mean(Absolut_delta), | |
SD = sd(Absolut_delta)) | |
# 17. Estimator-wise comparison of within-person variation across EPIC arrays by age group | |
# A. DNAmAge | |
Supplementary_Figure_5A <- mean_sd_absolute_delta_per_clock_v1_v2_per_group %>% | |
filter(Type == "DNAmAge") %>% | |
group_by(`Age Group`, Measurement) %>% | |
mutate(Difference = abs(diff(Mean))) %>% | |
filter(Comparison == "|v2.0-v1.0|") %>% | |
dplyr::select(-Comparison) %>% | |
ggplot(aes(x = Measurement, y = Difference, colour = `Age Group`, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 30), breaks = seq(0, 30, 5)) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_colour_brewer(palette = "Dark2") + | |
labs(y = "Mean Absolute"~Delta~"DNA Methylation Age (years)", | |
title = "Age Group Influence on DNA Methylation Age Variation Across Arrays", | |
subtitle = "Mean |(|v2.0-v1.0|)-(|v1.0-v1.0|)|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 13, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Supplementary_Figure_5A | |
# B. DNAmAge acceleration | |
Supplementary_Figure_5B <- mean_sd_absolute_delta_per_clock_v1_v2_per_group %>% | |
filter(Type == "AgeAccel") %>% | |
group_by(`Age Group`, Measurement) %>% | |
mutate(Difference = abs(diff(Mean)), | |
Measurement = str_replace(Measurement, "AgeAccel", "")) %>% | |
filter(Comparison == "|v2.0-v1.0|") %>% | |
dplyr::select(-Comparison) %>% | |
ggplot(aes(x = Measurement, y = Difference, colour = `Age Group`, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, 2)) + | |
scale_colour_brewer(palette = "Dark2") + | |
scale_fill_brewer(palette = "Dark2") + | |
labs(y = "Mean Absolute"~Delta~"DNA Methylation Age Acceleration (years)", | |
title = "Age Group Influence on DNAm Age Acceleration Variation Across Arrays", | |
subtitle = "Mean |(|v2.0-v1.0|)-(|v1.0-v1.0|)|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 16, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 13, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Supplementary_Figure_5B | |
# C. Pace of aging | |
Supplementary_Figure_5C <- mean_sd_absolute_delta_per_clock_v1_v2_per_group %>% | |
filter(Type == "Pace") %>% | |
group_by(`Age Group`, Measurement) %>% | |
mutate(Difference = abs(diff(Mean))) %>% | |
filter(Comparison == "|v2.0-v1.0|") %>% | |
dplyr::select(-Comparison) %>% | |
ggplot(aes(x = Measurement, y = Difference, colour = `Age Group`, fill = `Age Group`)) + | |
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) + | |
theme_bw() + | |
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) + | |
scale_colour_brewer(palette = "Dark2") + | |
scale_fill_brewer(palette = "Dark2") + | |
labs(y = "Mean Absolute"~Delta~"Years of Physiological Change per Chronological Year", | |
title = "Age Group Influence on Pace of Aging Variation Across Arrays", | |
subtitle = "Mean |(|v2.0-v1.0|)-(|v1.0-v1.0|)|") + | |
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(), | |
legend.background = element_rect(fill='transparent'), | |
axis.title.y = element_text(size = 12, face = "bold"), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
plot.title = element_text(size = 13, face = "bold"), | |
legend.title = element_text(size = 12, face = "bold")) | |
Supplementary_Figure_5C | |
# 18. Percent of probes with low correlation across individuals among probes | |
# with low reliability available from comparison by Sugdan et al. 2020 | |
low_reliability_sugden <- low_reliability_sugden %>% | |
dplyr::select(`Illumina Probe ID`, Reliability) %>% | |
filter(Reliability < 0.4) | |
common_cpgs <- intersect(rownames(epic_v1), rownames(epic_v2)) | |
colnames(epic_v1) <- paste0("Sample_", rep(1:8), "_V1") | |
colnames(epic_v2) <- paste0("Sample_", rep(1:8), "_V2") | |
epic_v1_df <- epic_v1 %>% | |
as.data.frame() %>% | |
filter(row.names(epic_v1) %in% common_cpgs) %>% | |
as.matrix() %>% t() %>% | |
as.data.frame() | |
epic_v2_df <- epic_v2 %>% | |
as.data.frame() %>% | |
filter(row.names(epic_v2) %in% colnames(epic_v1_df)) %>% | |
as.matrix() %>% t() %>% | |
as.data.frame() | |
epic_v1_df <- epic_v1_df %>% | |
dplyr::select(colnames(epic_v2_df)) | |
correlations <- map2_dbl( | |
.x = epic_v1_df, | |
.y = epic_v2_df, | |
~ cor(.x, .y, method = "spearman") | |
) | |
correlations_df <- as.data.frame(correlations) | |
correlations_df <- correlations_df %>% | |
rownames_to_column("Illumina Probe ID") %>% | |
left_join(low_reliability_sugden, by = "Illumina Probe ID") %>% | |
na.omit() | |
N_available_cpgs <- dim(correlations_df)[1] | |
correlations_df <- correlations_df %>% | |
filter(correlations <= 0.4 & correlations >= -0.4) | |
N_low_correlation_cpg <- dim(correlations_df)[1] | |
percentage_overlap <- N_low_correlation_cpg/N_available_cpgs*100 | |
# Session information | |
utils:::print.sessionInfo(sessionInfo()[-8]) | |
# End of script |