Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# Analysis script for the paper:
# "Performance of epigenetic aging estimators from peripheral blood using
# Infinium MethylationEPIC v2.0 vs. v1.0 BeadChip arrays"
#
# ==============================================================================
#
# Author: Natan Yusupov, natan_yusupov@psych.mpg.de
#
# Description: The script analyses DNAm data from human peripheral blood
# quantified with the recently introduced Infinium MethylationEPIC BeadChip
# array v2.0 ((Illumina, San Diego, CA, USA) and its performance in applications
# related to epigenetic aging estimation.
#
# Summary of steps performed in the script "analysis_script.R":
# 1. Load libraries and data
# 2. Inspection of content EPIC v1.0 and v2.0
# A. Number of CpGs
# B. Find duplicates of probes
# C. Find replicates of type I/II technology, strand TB and strand CO
# 3. Overlap of probes across EPIC array versions
# 4. Prepare data
# 5. Calculation of mean beta values, their standard deviations, median
# absolute deviation across subjects for both arrays, absolute delta mean
# of mean beta values between both arrays
# 6. Correlation of probes’ beta values between EPIC array versions
# 7. Probe-wise standard deviations of beta values in both EPIC array versions
# 8. EPIC version-related differences in DNA methylation quantification
# 9. Clocks probes available on EPIC v1.0 and v2.0
# 10. Clock probes within probes with a absolute delta mean over 0.1 of mean
# beta values between both arrays
# 11. Calculation of epigenetic age estimators
# A. Calculation of DNAmAge & DNAmAge Acceleration for following estimators:
# Hannum, Horvath, PhenoAge
# B. Calculation of Pace of aging for following estimators: DunedinPoAm, DunedinPACE
# 12. Comparison of biological age estimators in our sample
# A. DNAmAge
# B. DNAmAge acceleration
# C. Pace of aging
# 13. Absolute delta of biological age estimators within individual across and within platforms
# A. DNAmAge
# B. DNAmAge acceleration
# C. Pace of aging
# 14. Mean and SD of absolute delta of biological age estimators
# 15. Estimator-wise comparison of within-person variation across EPIC arrays
# A. DNAmAge
# B. DNAmAge acceleration
# C. Pace of aging
# 16. Mean and SD of absolute delta of biological age estimators in each age group
# 17. Estimator-wise comparison of within-person variation across EPIC arrays by age group
# A. DNAmAge
# B. DNAmAge acceleration
# C. Pace of aging
# 18. Percent of probes with low correlation across individuals among probes
# with low reliability available from comparison by Sugdan et al. 2020
# ==============================================================================
# 1. Load libraries and data
library(utils)
library(tidyverse)
library(eulerr)
library(purrr)
library(RColorBrewer)
library(ggpubr)
library(IlluminaHumanMethylationEPICmanifest)
library(IlluminaHumanMethylationEPICv2manifest)
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
library(readxl)
library(writexl)
library(ggh4x)
library(methylclock)
library(DunedinPoAm38)
library(DunedinPACE)
library(IlluminaHumanMethylationEPICmanifest)
library(IlluminaHumanMethylationEPICv2manifest)
library(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
# data for general analysis
annotation_epic_v2 <- getAnnotation(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
manifest_v1 <- getManifestInfo(IlluminaHumanMethylationEPICmanifest, "locusNames")
manifest_v2 <- getManifestInfo(IlluminaHumanMethylationEPICv2manifest, "locusNames")
load("Betas_clean_quantile_filtered_v1.Rdata") # V1 preprocessed, normalized and filtered beta
betas_v1 <- Betas_clean_quantile_filtered_v1
load("Betas_clean_quantile_filtered_v2.Rdata") # V2 preprocessed, normalized and filtered beta
betas_v2 <- Betas_clean_quantile_filtered_v2
# data for epigenetic clocks analysis
load("Betas_clean_quantile_bmiq_v1.Rdata")
epic_v1 <- Betas_clean_quantile_bmiq_v1
load("Betas_clean_quantile_bmiq_v2.Rdata")
epic_v2 <- Betas_clean_quantile_bmiq_v2
load("predicted_age_v1_2020") # priorly calculated epigenetic age
# Clock CpGs of the epigenetic age estimators used in the analysis
read_xlsx("clocks_cpgs.xlsx")
# data from Sugden et al. 2020, doi: 10.1016/j.patter.2020.100014. PMID: 32885222; PMCID: PMC7467214
low_reliability_sugden <- read_xlsx("../info/low_reliability_sugden.xlsx")
# 2. Inspection of content EPIC v1.0 and v2.0
# A. Number of probes
manifest_v1 <- unique(manifest_v1)
length(manifest_v1)
manifest_v2 <- str_replace(manifest_v2, "_[:alnum:]+$", "") # remove special annotation ending from v2.0 for comparison
manifest_v2 <- unique(manifest_v2)
length(manifest_v2)
annotation_epic_v2 <- annotation_epic_v2 %>%
as.data.frame() %>%
separate(Name, c("EPICv2_Loci", "classifier"), "_", remove = FALSE)
# B. Find duplicates of probes
duplicated_vector <- annotation_epic_v2 %>%
filter(duplicated(EPICv2_Loci)) %>% pull(EPICv2_Loci)
all_duplicates_df <- annotation_epic_v2 %>%
filter(EPICv2_Loci %in% duplicated_vector) %>% arrange(EPICv2_Loci)
# C. Find replicates of type I/II technology, strand TB and strand CO
replicated_type12_illuminastrand_assaystrand_df <- all_duplicates_df %>%
mutate(Type = as.factor(Type)) %>%
group_by(EPICv2_Loci) %>%
filter(n_distinct(Type) > 1 | n_distinct(Strand_TB) > 1 | n_distinct(Strand_CO) > 1)
replicated_type12_illuminastrand_assaystrand <- replicated_type12_illuminastrand_assaystrand_df %>%
distinct(EPICv2_Loci) %>% pull(EPICv2_Loci)
# 3. Overlap of probes across EPIC array versions
supplemenatry_figure_1 <- plot(euler(list(`EPIC v1.0` = manifest_v1, `EPIC v2.0` = manifest_v2)),
key = TRUE, counts = TRUE, input = "union", alpha = 0.5,
quantities = list(type = c("counts","percent"), cex = 1.2),
fills = list(fill = c("royalblue4", "steelblue4", "lightskyblue")),
edges = list(lty = 2), factor_names = TRUE,
labels = list(font=2, cex=2), legend = FALSE,
main = expression(~bold("Comparison of Total Probes")))
supplemenatry_figure_1
# 4. Prepare data
differring_cpg_names <- annotation_epic_v2 %>%
mutate_all(na_if, "") %>%
filter(!EPICv2_Loci %in% duplicated_vector) %>%
mutate(identical = ifelse(EPICv2_Loci == EPICv1_Loci, "yes", "no")) %>%
filter(identical == "no")
differring_cpg_names_v1 <- differring_cpg_names %>% pull(EPICv1_Loci)
differring_cpg_names_v2 <- differring_cpg_names %>% pull(EPICv2_Loci)
common_cpgs <- intersect(manifest_v1, manifest_v2)
betas_v1_df <- betas_v1 %>%
as.data.frame() %>%
rownames_to_column("EPIC_Loci") %>%
filter(!EPIC_Loci %in% differring_cpg_names_v1) %>%
filter(EPIC_Loci %in% common_cpgs) %>%
column_to_rownames("EPIC_Loci") %>%
as.matrix() %>%
t() %>%
as.data.frame()
betas_v2_df <- betas_v2 %>%
as.data.frame() %>%
rownames_to_column("cpg") %>%
separate(cpg, c("EPIC_Loci", "classifier"), "_") %>%
distinct(EPIC_Loci, .keep_all = TRUE) %>%
dplyr::select(-classifier) %>%
filter(EPIC_Loci %in% colnames(betas_v1_df)) %>%
column_to_rownames("EPIC_Loci") %>%
as.matrix() %>%
t() %>%
as.data.frame()
betas_v1_df <- betas_v1_df %>%
dplyr::select(colnames(betas_v2_df))
# 5. Calculation of mean beta values, their standard deviations, median
# absolute deviation across subjects for both arrays, absolute delta mean
# of mean beta values between both arrays
betas_v1_df_long <- betas_v1_df %>%
pivot_longer(cols = everything(), names_to = "cpg", values_to = "beta") %>%
group_by(cpg) %>%
summarize(mean_beta_v1 = mean(beta),
sd_beta_v1 = sd(beta),
mad_beta_v1 = mad(beta))
betas_v2_df_long <- betas_v2_df %>%
pivot_longer(cols = everything(), names_to = "cpg", values_to = "beta") %>%
group_by(cpg) %>%
summarize(mean_beta_v2 = mean(beta),
sd_beta_v2 = sd(beta),
mad_beta_v2 = mad(beta))
joined_df <- betas_v1_df_long %>%
left_join(betas_v2_df_long, by = "cpg") %>%
mutate(delta_mean = abs(mean_beta_v2 - mean_beta_v1))
# 6. Correlation of probes’ beta values between EPIC array versions
Supplementary_Figure_2 <- joined_df %>%
ggplot(aes(x = mean_beta_v1, y = mean_beta_v2)) +
geom_point(alpha = 0.25, color = 4, fill = 4) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
labs(x = "Mean Beta Value (EPIC v1.0)", y = "Mean Beta Value (EPIC v2.0)") +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12)) +
stat_cor(method="pearson")
Supplementary_Figure_2
# 7. Probe-wise standard deviations of beta values in both EPIC array versions
Supplementary_Figure_3A_1 <- joined_df %>%
ggplot(aes(x = sd_beta_v1)) +
geom_histogram(alpha = 0.25, color = 4, fill = 4, bins = 100) +
theme_bw() +
labs(x = "Standard Deviation of Beta Value",
title = "Probe-Wise Standard Deviations (EPIC v1.0)") +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold")) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE),
breaks = seq(0, 150000, 25000))
Supplementary_Figure_3A_1
Supplementary_Figure_3A_2 <- joined_df %>%
ggplot(aes(x = sd_beta_v1)) +
geom_boxplot(alpha = 0.25, color = 4, fill = 4) +
labs(x = "Standard Deviation of Beta Value") +
theme_bw() +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_blank(), axis.ticks.y = element_blank())
Supplementary_Figure_3A_2
Supplementary_Figure_3B_1 <- joined_df %>%
ggplot(aes(x = sd_beta_v2)) +
geom_histogram(alpha = 0.25, color = 4, fill = 4, bins = 100) +
theme_bw() +
labs(x = "Standard Deviation of Beta Value",
title = "Probe-Wise Standard Deviations (EPIC v2.0)") +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold")) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE),
breaks = seq(0, 150000, 25000))
Supplementary_Figure_3B_1
Supplementary_Figure_3B_2 <- joined_df %>%
ggplot(aes(x = sd_beta_v2)) +
geom_boxplot(alpha = 0.25, color = 4, fill = 4) +
labs(x = "Standard Deviation of Beta Value") +
theme_bw() +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_blank(), axis.ticks.y = element_blank())
Supplementary_Figure_3B_2
mean(joined_df$mad_beta_v1)
sd(joined_df$mad_beta_v1)
count(joined_df$sd_beta_v1 > 0.1)
count(joined_df$sd_beta_v1 > 0.1)/nrow(joined_df)*100
mean(joined_df$mad_beta_v2)
count(joined_df$sd_beta_v2 > 0.1)
count(joined_df$sd_beta_v2 > 0.1)/nrow(joined_df)*100
# 8. EPIC version-related differences in DNA methylation quantification
Supplementary_Figure_4_1 <- joined_df %>%
ggplot(aes(x = delta_mean)) +
geom_histogram(alpha = 0.25, color = 4, fill = 4, bins = 100) +
theme_bw() +
labs(x = "Absolute"~Delta~"Mean of Beta Values",
title = "Probe-Wise Absolute"~Delta~"Mean of Beta Values Between EPIC Versions") +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 14, face = "bold"))
Supplementary_Figure_4_1
Supplementary_Figure_4_2 <- joined_df %>%
ggplot(aes(x = delta_mean)) +
geom_boxplot(alpha = 0.25, color = 4, fill = 4) +
labs(x = "Absolute"~Delta~"Mean of Beta Values") +
theme_bw() +
theme(axis.title.x = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_blank(), axis.ticks.y = element_blank())
Supplementary_Figure_4_2
mean(joined_df$delta_mean)
sd(joined_df$delta_mean)
count(joined_df$delta_mean > 0.1)
count(joined_df$delta_mean > 0.1)/nrow(joined_df)*100
# 9. Clock probes available on EPIC v1.0 and v2.0
df_clocks_v1_v2 <- data.frame()
for (i in colnames(clocks_cpgs)){
clock_name <- i
cpg_names <- clocks_cpgs %>% dplyr::select(.data[[i]]) %>% na.omit() %>% pull(.data[[i]])
length_clock_cpgs <- length(cpg_names)
length_intersect_v1 <- length(intersect(manifest_v1, cpg_names))
percent_interect_v1 <- length_intersect_v1/length_clock_cpgs*100
length_intersect_v2 <- length(intersect(manifest_v2, cpg_names))
percent_interect_v2 <- length_intersect_v2/length_clock_cpgs*100
output <- data.frame(Algorithm = clock_name,
N_clock_cpgs = length_clock_cpgs,
N_available_v1 = length_intersect_v1,
Percent_available_v1 = percent_interect_v1,
N_available_v2 = length_intersect_v2,
Percent_available_v2 = percent_interect_v2)
df_clocks_v1_v2 <- rbind(df_clocks_v1_v2, output)
}
df_clocks_v1_v2 <- df_clocks_v1_v2 %>%
mutate(Algorithm = c("Horvath", "Hannum", "PhenoAge", "DunedinPoAm", "DunedinPACE"),
Percent_available_v1 = round(Percent_available_v1),
Percent_available_v2 = round(Percent_available_v2))
colnames(df_clocks_v1_v2) <- c("Biological Age Estimator", "N of Clock CpGs",
"N of Available CpGS V1", "Percentage of Available CpGS V1",
"N of Available CpGS V2", "Percentage of Available CpGS V2")
# 10. Clock probes within probes with a absolute delta mean over 0.1 of mean
# beta values between both arrays
probes_deltamean_over_0.1_df <- joined_df %>% filter(delta_mean > 0.1)
intersect(clocks_cpgs$horvath_cpg, probes_deltamean_over_0.1_df$cpg)
intersect(clocks_cpgs$hannum_cpg, probes_deltamean_over_0.1_df$cpg)
intersect(clocks_cpgs$phenoage_cpg, probes_deltamean_over_0.1_df$cpg)
intersect(clocks_cpgs$dunedinpoam_cpg, probes_deltamean_over_0.1_df$cpg)
intersect(clocks_cpgs$dunedinpace_cpg, probes_deltamean_over_0.1_df$cpg)
# 11. Calculation of epigenetic age estimators
# A. Calculation of DNAmAge & DNAmAge Acceleration for following estimators:
# Hannum, Horvath, PhenoAge
age <- predicted_age_v1_2020$age
predicted_age_v1_2023 <- DNAmAge(epic_v1, clocks = c("Horvath", "Hannum", "Levine"),
age = age, normalize = FALSE, cell.count = FALSE)
epic_v2 <- epic_v2 %>%
as.data.frame() %>%
rownames_to_column("Name") %>%
left_join(annotation_epic_v2 %>% dplyr::select(Name, EPICv2_Loci, Rep_Num), by = "Name")
mean_specific_epic_v2 <- epic_v2 %>%
filter(EPICv2_Loci %in% replicated_type12_illuminastrand_assaystrand) %>%
dplyr::select(-Rep_Num) %>%
pivot_longer(cols = -c(EPICv2_Loci, Name), names_to = "sample", values_to = "beta") %>%
group_by(sample, EPICv2_Loci) %>%
summarise(mean_beta = mean(beta)) %>%
ungroup() %>%
pivot_wider(names_from = "sample", values_from = "mean_beta") %>%
column_to_rownames("EPICv2_Loci")
epic_v2 <- epic_v2 %>%
filter(Rep_Num == "1") %>%
filter(!EPICv2_Loci %in% replicated_type12_illuminastrand_assaystrand) %>%
dplyr::select(-Name, -Rep_Num) %>%
column_to_rownames("EPICv2_Loci") %>%
bind_rows(mean_specific_epic_v2) %>%
as.matrix()
predicted_age_v2 <- DNAmAge(epic_v2, clocks = c("Horvath", "Hannum", "Levine"),
age = age, normalize = FALSE, cell.count = FALSE)
# B. Calculation of Pace of aging for following estimators: DunedinPoAm, DunedinPACE
dunedinpoam_df <- as.data.frame(DunedinPoAm38::PoAmProjector(betas = epic_v1))
dunedinpoam_df <- dunedinpoam_df %>%
dplyr::rename(DunedinPoAm = "DunedinPoAm_38") %>% rownames_to_column("arrayid")
dunedinpace_df <- as.data.frame(DunedinPACE::PACEProjector(betas = epic_v1))
dunedinpace_df <- dunedinpace_df %>% rownames_to_column("arrayid")
predicted_age_v1_2023 <- predicted_age_v1_2023 %>%
dplyr::select(arrayid = "id", -age, Horvath, AgeAccelHorvath = "ageAcc2.Horvath",
Hannum, AgeAccelHannum = "ageAcc2.Hannum", PhenoAge = "Levine",
AgeAccelPheno = "ageAcc2.Levine") %>%
left_join(dunedinpoam_df, by = "arrayid") %>%
left_join(dunedinpace_df, by = "arrayid") %>%
mutate(Array = "v1.0_2023") %>%
left_join(predicted_age_v1_2020 %>% dplyr::select(sampleId, arrayid, age, sex), by = "arrayid")
dunedinpoam_df <- as.data.frame(DunedinPoAm38::PoAmProjector(betas = epic_v2,
proportionOfProbesRequired = 0.6))
dunedinpoam_df <- dunedinpoam_df %>%
dplyr::rename(DunedinPoAm = "DunedinPoAm_38") %>% rownames_to_column("arrayid")
dunedinpace_df <- as.data.frame(DunedinPACE::PACEProjector(betas = epic_v2))
dunedinpace_df <- dunedinpace_df %>% rownames_to_column("arrayid")
predicted_age_v2 <- predicted_age_v2 %>%
dplyr::select(arrayid = "id", -age, Horvath, AgeAccelHorvath = "ageAcc2.Horvath",
Hannum, AgeAccelHannum = "ageAcc2.Hannum", PhenoAge = "Levine",
AgeAccelPheno = "ageAcc2.Levine") %>%
left_join(dunedinpoam_df, by = "arrayid") %>%
left_join(dunedinpace_df, by = "arrayid") %>%
mutate(Array = "v2.0") %>%
bind_cols(predicted_age_v1_2020 %>% dplyr::select(sampleId, age, sex))
data_final <- bind_rows(predicted_age_v1_2020, predicted_age_v1_2023, predicted_age_v2)
data_final <- data_final %>%
mutate(Array = as.factor(Array),
age = as.factor(age)) %>%
dplyr::select(-arrayid, -sex)
age_levels <- levels(data_final$age)
data_final_long <- data_final %>%
pivot_longer(cols = -c(sampleId, Array, age),
names_to = "Measurement" , values_to = "value") %>%
mutate(Type = case_when(str_detect(Measurement, "AgeAccel") ~ "AgeAccel",
str_detect(Measurement, "Dunedin") ~ "Pace",
TRUE ~ "DNAmAge"),
sampleId = as.factor(sampleId),
Type = as.factor(Type),
Measurement = factor(Measurement, levels = c("Horvath", "Hannum", "PhenoAge",
"AgeAccelHorvath", "AgeAccelHannum",
"AgeAccelPheno", "DunedinPoAm", "DunedinPACE")))
# 12. Comparison of biological age estimators in our sample
# A. DNAmAge
Figure_1A <- data_final_long %>%
filter(Type == "DNAmAge") %>%
ggplot(aes(x = sampleId, y = value, fill = Measurement)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
theme_bw() +
labs(y = "DNA Methylation Age (years)") +
theme(legend.position = c(0.1, 0.87), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold")) +
scale_y_continuous(limits = c(0, 85), breaks = seq(0, 90, 10)) +
geom_text(aes(label = Array), angle=90,
position = position_dodge2(width = 0.8),
size = 2.5, hjust = -0.25, vjust = 0.5,
show.legend = FALSE) +
scale_fill_brewer(palette = "Dark2")
Figure_1A
# B. DNAmAge acceleration
Figure_1B <- data_final_long %>%
filter(Type == "AgeAccel") %>%
mutate(Measurement = str_replace(Measurement, "AgeAccel", ""),
Measurement = str_replace(Measurement, "Pheno", "PhenoAge"),
Measurement = factor(Measurement, levels = c("Horvath", "Hannum", "PhenoAge")),
Array = as.character(Array)) %>%
ggplot(aes(x = sampleId, y = value, fill = Measurement)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.4), alpha = 0.8) +
theme_bw() +
labs(y = "DNA Methylation Age Acceleration (years)") +
theme(legend.position = c(0.1, 0.87), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold")) +
scale_y_continuous(limits = c(-12, 12), breaks = seq(-12, 12, 2)) +
geom_text(aes(label=ifelse(value > 0, Array, "")),
position = position_dodge2(width = 0.8), angle=90,
size = 2.5, hjust = -0.25, vjust = 0.5,
show.legend = FALSE) +
geom_text(aes(label=ifelse(value < 0, Array, "")),
position = position_dodge2(width = 0.8), angle=90,
size = 2.5, hjust = 1.25, vjust = 0.5,
show.legend = FALSE) +
scale_fill_brewer(palette = "Dark2")
Figure_1B
# C. Pace of aging
Figure_1C <- data_final_long %>%
filter(Type == "Pace") %>%
ggplot(aes(x = sampleId, y = value, fill = Array)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.4), alpha = 0.8) +
theme_bw() +
labs(y = "Years of Physiological Change per Chronological Year") +
theme(legend.position = c(0.1, 0.87), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold")) +
scale_y_continuous(limits = c(0, 1.6), breaks = seq(0, 1.6, 0.2)) +
geom_text(aes(label=Measurement), angle=90,
position = position_dodge2(width = 0.8),
size = 2.5, hjust = -0.25, vjust = 0.5,
show.legend = FALSE) +
scale_fill_brewer(palette = "Dark2")
Figure_1C
# 13. Absolute delta of biological age estimators within individual across and within platforms
# A. DNAmAge
data_final_diff_v1 <- data_final_long %>%
filter(Array != "v2.0") %>%
group_by(Type, Measurement, sampleId) %>%
mutate(Absolut_delta = abs(diff(value))) %>%
filter(Array != "v1.0_2020") %>%
dplyr::select(`Age Group` = age, sampleId, Type, Measurement, Absolut_delta) %>%
mutate(Comparison = as.factor("|v1.0-v1.0|"))
data_final_diff_v1_v2 <- data_final_long %>%
filter(Array != "v1.0_2020") %>%
group_by(Type, Measurement, sampleId) %>%
mutate(Absolut_delta = abs(diff(value))) %>%
filter(Array != "v1.0_2023") %>%
dplyr::select(`Age Group` = age, sampleId, Type, Measurement, Absolut_delta) %>%
mutate(Comparison = as.factor("|v2.0-v1.0|"))
Figure_2A <- data_final_diff_v1_v2 %>%
filter(Type == "DNAmAge") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
scale_y_continuous(limits = c(0,26), breaks = seq(0, 26, 2)) +
labs(y = "DNA Methylation Age (years)", title = "Absolute"~Delta~"DNA Methylation Age",
subtitle = "|v2.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_2A
Figure_2B <- data_final_diff_v1 %>%
filter(Type == "DNAmAge") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
scale_y_continuous(limits = c(0, 26), breaks = seq(0, 26, 2)) +
labs(y = "DNA Methylation Age (years)", title = "Absolute"~Delta~"DNA Methylation Age",
subtitle = "|v1.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_2B
# B. DNAmAge acceleration
Figure_2C <- data_final_diff_v1_v2 %>%
filter(Type == "AgeAccel") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, 1)) +
labs(y = "DNA Methylation Age Acceleration (years)", title = "Absolute"~Delta~"DNAm Age Acceleration",
subtitle = "|v2.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_2C
Figure_2D <- data_final_diff_v1 %>%
filter(Type == "AgeAccel") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, 1)) +
labs(y = "DNA Methylation Age Acceleration (years)", title = "Absolute"~Delta~"DNAm Age Acceleration",
subtitle = "|v1.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_2D
# C. Pace of aging
Figure_2E <- data_final_diff_v1_v2 %>%
filter(Type == "Pace") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
labs(y = "Years of Physiological Change per Chronological Year",
title = "Absolute"~Delta~"Pace of Aging",
subtitle = "|v2.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_2E
Figure_2F <- data_final_diff_v1 %>%
filter(Type == "Pace") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
labs(y = "Years of Physiological Change per Chronological Year",
title = "Absolute"~Delta~"Pace of Aging",
subtitle = "|v1.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_2F
# 14. Mean and SD of absolute delta of biological age estimators
data_final_diff_complete <- bind_rows(data_final_diff_v1_v2, data_final_diff_v1)
mean_sd_absolute_delta_per_clock_v1_v2 <- data_final_diff_complete %>%
group_by(Comparison, Type, Measurement) %>%
summarise(
Mean = mean(Absolut_delta),
SD = sd(Absolut_delta))
# 15. Estimator-wise comparison of within-person variation across EPIC arrays
# A. DNAmAge
Figure_3A <- data_final_diff_complete %>%
filter(Type == "DNAmAge") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, colour = Comparison, fill = Comparison)) +
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.5) +
geom_point(position = position_dodge(width = 0.8), alpha = 0.5) +
stat_compare_means(method = "wilcox.test", paired = FALSE, label = "..p.format..") +
theme_bw() +
scale_y_continuous(limits = c(0, 26), breaks = seq(0, 26, 4)) +
scale_fill_brewer(palette = "Dark2") +
scale_colour_brewer(palette = "Dark2") +
labs(y = "Absolute"~Delta~"DNA Methylation Age (years)",
title = "Within-Person Variation in DNA Methylation Age Across Arrays",
subtitle = "|v2.0-v1.0| vs. |v1.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_3A
# B. DNAmAge acceleration
Figure_3B <- data_final_diff_complete %>%
filter(Type == "AgeAccel") %>%
mutate(Measurement = as.factor(str_replace(Measurement, "AgeAccel", ""))) %>%
ggplot(aes(x = Measurement, y = Absolut_delta, colour = Comparison, fill = Comparison)) +
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.5) +
geom_point(position = position_dodge(width = 0.8), alpha = 0.5) +
stat_compare_means(method = "wilcox.test", paired = FALSE, label = "..p.format..") +
theme_bw() +
scale_y_continuous(limits = c(0, 26), breaks = seq(0, 26, 2)) +
scale_fill_brewer(palette = "Dark2") +
scale_colour_brewer(palette = "Dark2") +
labs(y = "Absolute"~Delta~"DNA Methylation Age Acceleration (years)",
title = "Within-Person Variation in DNAm Age Acceleration Across Arrays",
subtitle = "|v2.0-v1.0| vs. |v1.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_3B
# C. Pace of aging
Figure_3C <- data_final_diff_complete %>%
filter(Type == "Pace") %>%
ggplot(aes(x = Measurement, y = Absolut_delta, colour = Comparison, fill = Comparison)) +
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.5) +
geom_point(position = position_dodge(width = 0.8), alpha = 0.5) +
stat_compare_means(method = "wilcox.test", paired = FALSE, label = "..p.format..") +
theme_bw() +
scale_y_continuous(limits = c(0, 0.8), breaks = seq(0, 0.8, 0.1)) +
scale_fill_brewer(palette = "Dark2") +
scale_colour_brewer(palette = "Dark2") +
labs(y = "Absolute"~Delta~"Years of Physiological Change per Chronological Year",
title = "Within-Person Variation in Pace of Aging Across Arrays",
subtitle = "|v2.0-v1.0| vs. |v1.0-v1.0|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 12, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Figure_3C
# 16. Mean and SD of absolute delta of biological age estimators in each age group
mean_sd_absolute_delta_per_clock_v1_v2_per_group <- data_final_diff_complete %>%
group_by(Comparison, `Age Group`, Type, Measurement) %>%
summarise(
Mean = mean(Absolut_delta),
SD = sd(Absolut_delta))
# 17. Estimator-wise comparison of within-person variation across EPIC arrays by age group
# A. DNAmAge
Supplementary_Figure_5A <- mean_sd_absolute_delta_per_clock_v1_v2_per_group %>%
filter(Type == "DNAmAge") %>%
group_by(`Age Group`, Measurement) %>%
mutate(Difference = abs(diff(Mean))) %>%
filter(Comparison == "|v2.0-v1.0|") %>%
dplyr::select(-Comparison) %>%
ggplot(aes(x = Measurement, y = Difference, colour = `Age Group`, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
theme_bw() +
scale_y_continuous(limits = c(0, 30), breaks = seq(0, 30, 5)) +
scale_fill_brewer(palette = "Dark2") +
scale_colour_brewer(palette = "Dark2") +
labs(y = "Mean Absolute"~Delta~"DNA Methylation Age (years)",
title = "Age Group Influence on DNA Methylation Age Variation Across Arrays",
subtitle = "Mean |(|v2.0-v1.0|)-(|v1.0-v1.0|)|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Supplementary_Figure_5A
# B. DNAmAge acceleration
Supplementary_Figure_5B <- mean_sd_absolute_delta_per_clock_v1_v2_per_group %>%
filter(Type == "AgeAccel") %>%
group_by(`Age Group`, Measurement) %>%
mutate(Difference = abs(diff(Mean)),
Measurement = str_replace(Measurement, "AgeAccel", "")) %>%
filter(Comparison == "|v2.0-v1.0|") %>%
dplyr::select(-Comparison) %>%
ggplot(aes(x = Measurement, y = Difference, colour = `Age Group`, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
theme_bw() +
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, 2)) +
scale_colour_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
labs(y = "Mean Absolute"~Delta~"DNA Methylation Age Acceleration (years)",
title = "Age Group Influence on DNAm Age Acceleration Variation Across Arrays",
subtitle = "Mean |(|v2.0-v1.0|)-(|v1.0-v1.0|)|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Supplementary_Figure_5B
# C. Pace of aging
Supplementary_Figure_5C <- mean_sd_absolute_delta_per_clock_v1_v2_per_group %>%
filter(Type == "Pace") %>%
group_by(`Age Group`, Measurement) %>%
mutate(Difference = abs(diff(Mean))) %>%
filter(Comparison == "|v2.0-v1.0|") %>%
dplyr::select(-Comparison) %>%
ggplot(aes(x = Measurement, y = Difference, colour = `Age Group`, fill = `Age Group`)) +
geom_col(width = 0.8, position = position_dodge2(width = 0.8), alpha = 0.8) +
theme_bw() +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
scale_colour_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
labs(y = "Mean Absolute"~Delta~"Years of Physiological Change per Chronological Year",
title = "Age Group Influence on Pace of Aging Variation Across Arrays",
subtitle = "Mean |(|v2.0-v1.0|)-(|v1.0-v1.0|)|") +
theme(legend.position = c(0.9, 0.80), axis.title.x = element_blank(),
legend.background = element_rect(fill='transparent'),
axis.title.y = element_text(size = 12, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12, face = "bold"))
Supplementary_Figure_5C
# 18. Percent of probes with low correlation across individuals among probes
# with low reliability available from comparison by Sugdan et al. 2020
low_reliability_sugden <- low_reliability_sugden %>%
dplyr::select(`Illumina Probe ID`, Reliability) %>%
filter(Reliability < 0.4)
common_cpgs <- intersect(rownames(epic_v1), rownames(epic_v2))
colnames(epic_v1) <- paste0("Sample_", rep(1:8), "_V1")
colnames(epic_v2) <- paste0("Sample_", rep(1:8), "_V2")
epic_v1_df <- epic_v1 %>%
as.data.frame() %>%
filter(row.names(epic_v1) %in% common_cpgs) %>%
as.matrix() %>% t() %>%
as.data.frame()
epic_v2_df <- epic_v2 %>%
as.data.frame() %>%
filter(row.names(epic_v2) %in% colnames(epic_v1_df)) %>%
as.matrix() %>% t() %>%
as.data.frame()
epic_v1_df <- epic_v1_df %>%
dplyr::select(colnames(epic_v2_df))
correlations <- map2_dbl(
.x = epic_v1_df,
.y = epic_v2_df,
~ cor(.x, .y, method = "spearman")
)
correlations_df <- as.data.frame(correlations)
correlations_df <- correlations_df %>%
rownames_to_column("Illumina Probe ID") %>%
left_join(low_reliability_sugden, by = "Illumina Probe ID") %>%
na.omit()
N_available_cpgs <- dim(correlations_df)[1]
correlations_df <- correlations_df %>%
filter(correlations <= 0.4 & correlations >= -0.4)
N_low_correlation_cpg <- dim(correlations_df)[1]
percentage_overlap <- N_low_correlation_cpg/N_available_cpgs*100
# Session information
utils:::print.sessionInfo(sessionInfo()[-8])
# End of script