Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DiffBrainNet/01_DiffExp/14_CompGWAS_DE.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
147 lines (122 sloc)
5.21 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
## Project: DexStim Mouse Brain | |
## Date: 07.09.2022 | |
## Author: Nathalie | |
################################################## | |
# Compare DEs and DNs with previously identified GWAS risk genes | |
# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) | |
library(data.table) | |
library(tidyr) | |
library(dplyr) | |
library(stringr) | |
library(readxl) | |
library(fgsea) | |
library(biomaRt) | |
library(ggplot2) | |
# trait of interest | |
traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ") | |
# trait <- "MDD" | |
# define pathes | |
basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" | |
folder_table <- paste0(basedir,"/tables") | |
file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") | |
files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) | |
regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) | |
file_background <- paste0(folder_table, "/06_background.txt") | |
## 1. Read DE genes from different brain regions | |
expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) | |
names(expr_list) <- regions_files | |
de_list <- lapply(expr_list, function(x) x$V1) | |
# unique DE genes | |
data_unique <- bind_rows(expr_list, .id = "region") %>% | |
group_by(V1) %>% | |
summarise(region = list(region)) %>% | |
mutate(nr_regions = lengths(region)) %>% | |
mutate(unique = (nr_regions == 1)) %>% | |
filter(unique) | |
de_uni <- list() | |
for (r in regions_files){ | |
de_uni[[r]] <- data_unique$V1[data_unique$region == r] | |
} | |
# background genes | |
background <- read.table(file_background, | |
row.names = NULL) | |
## 2. Mapping of mouse IDs to human IDs | |
# map mouse Ensembl Ids to human Ensembl Ids | |
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") | |
mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") | |
# --> this is a workaround as the normal commands lead to errors | |
# --> one mouse ID can be mapped to multiple human IDs | |
de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), | |
filters="ensembl_gene_id", values=x, mart=mouse, | |
attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) | |
de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), | |
filters="ensembl_gene_id", values=x, mart=mouse, | |
attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) | |
background_human <- getLDS(attributes=c("ensembl_gene_id"), | |
filters="ensembl_gene_id", values=background$V1, mart=mouse, | |
attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 | |
res_list <- list() | |
res_list_uni <- list() | |
for (trait in traits){ | |
## 2. Read GWAS/H-MAGMA data | |
data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% | |
arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper | |
filter(GENE %in% background_human) | |
ranks <- data$ZSTAT | |
names(ranks) <- data$GENE | |
## 3. Run fgsea for Gene Set Enrichment Analysis | |
fgseaRes <- fgsea(pathways = de_human, | |
stats = ranks) | |
# sampleSize = 600) | |
res_list[[trait]] <- fgseaRes | |
# unique DE genes | |
fgseaRes_uni <- fgsea(pathways = de_uni_human, | |
stats = ranks) | |
# sampleSize = 600) | |
res_list_uni[[trait]] <- fgseaRes_uni | |
plotEnrichment(de_uni_human[["PFC"]], | |
ranks) | |
} | |
## 4. Plot GSEA p-values as heatmap from all traits and brain regions | |
df <- bind_rows(res_list, .id="trait") | |
df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests | |
ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + | |
geom_tile() + | |
xlab("GWAS trait") + | |
ylab("Brain region") + | |
scale_fill_gradient(name = "-log10(FDR)", | |
low="lightgrey", high="#D45E60", | |
limits = c(0,1)) + | |
theme_light() + | |
theme( | |
axis.title.x = element_text(size = 15), | |
axis.title.y = element_text(size = 15), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
legend.text = element_text(size = 12) | |
) | |
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEall.pdf"), | |
width = 8, | |
height = 6) | |
# plot for unique DE genes | |
df <- bind_rows(res_list_uni, .id="trait") | |
df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests | |
ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + | |
geom_tile() + | |
xlab("GWAS trait") + | |
ylab("Brain region") + | |
scale_fill_gradient(name = "-log10(FDR)", | |
low="lightgrey", high="#D45E60", | |
limits = c(0,1)) + | |
theme_light() + | |
theme( | |
axis.title.x = element_text(size = 15), | |
axis.title.y = element_text(size = 15), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
legend.text = element_text(size = 12) | |
) | |
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEunique.pdf"), | |
width = 8, | |
height = 6) |