Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DiffBrainNet/01_DiffExp/07_anRichmentAnalysis.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
448 lines (388 sloc)
17.6 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
## Project: DexStim Mouse Brain | |
## Date: 08.09.2020 | |
## Author: Nathalie | |
################################################## | |
# Functional annotation with anRichment | |
library(anRichment) | |
library(anRichmentMethods) | |
library(org.Mm.eg.db) | |
library(ggplot2) | |
library(dplyr) | |
basepath <- "~/Documents/ownCloud/DexStim_RNAseq_Mouse/" | |
folder_plots <- paste0("figures") | |
folder_tables <- paste0("tables") | |
GOcollection <- buildGOcollection(organism = "mouse") | |
# 1.1 GO enrichment all regions --------------------- | |
genes <- read.table(file = paste0(basepath, folder_tables, "/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_entrezID.txt"), | |
header = FALSE) | |
background <- read.table(file = paste0(basepath, folder_tables, "/06_background_entrezID.txt"), | |
header = FALSE) | |
modules <- rep("not_significant", nrow(background)) | |
modules[which(background$V1 %in% genes$V1)] <- "significant" | |
# enrichment | |
GOenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = background$V1, | |
refCollection = GOcollection, | |
useBackground = "given", | |
# threshold = 0.1, | |
# thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE, | |
ignoreLabels = "not_significant" | |
) | |
# Filter out terms with less than 10 genes overlap and FDR above 0.1 | |
table.display <- GOenrichment$enrichmentTable %>% | |
filter(nCommonGenes >= 10 & FDR <= 0.1) | |
write.csv(table.display, file = paste0(folder_tables,"/07_GOenrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.csv"), | |
row.names = FALSE) | |
# Plot results (Top 20 terms) | |
table.display %>% | |
head(20) %>% | |
arrange(desc(FDR)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(FDR), fill = inGroups)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
scale_fill_discrete(name = "Ontology", | |
labels = c("BP", "MF", "CC")) + | |
coord_flip() + | |
ylab("-log10(FDR)") + | |
xlab("GOterm") + | |
ggtitle("GO terms enriched for DE genes across all brain regions (Top 20)") + | |
theme(text = element_text(size=15), | |
plot.title = element_text(size = 15, hjust=1)) | |
ggsave(filename = paste0(folder_plots, "/07_GOenrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.png")) | |
# 1.2 NCBI BioSystems collection ----------------- | |
biosysCollection <- BioSystemsCollection("mouse") | |
knownGroups(biosysCollection) | |
# enrichment | |
KEGGenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = background$V1, | |
refCollection = biosysCollection, | |
useBackground = "given", | |
threshold = 0.1, | |
thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE, | |
ignoreLabels = "not_significant" | |
) | |
collectGarbage() | |
names(KEGGenrichment) | |
names(KEGGenrichment$enrichmentTable) | |
table.display <- KEGGenrichment$enrichmentTable | |
table.display$overlapGenes <- shortenStrings(table.display$overlapGenes, maxLength = 70, | |
split = "|") | |
write.csv(GOenrichment$enrichmentTable, file = paste0(folder_tables,"/07_PathwayEnrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.csv"), | |
row.names = FALSE) | |
# plot results (plotted pvalues are not adjusted for multiple testing) | |
table.display %>% | |
arrange(desc(pValue)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(pValue), fill=inGroups)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
coord_flip() + | |
ggtitle("Pathways enriched for DE genes across all brain regions") | |
ggsave(filename = paste0(folder_plots, "/07_PathwayEnrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.png")) | |
# 1.3 calculate enrichment of disease associated genes --------- | |
library(disgenet2r) | |
# Schizophrenia C0036341 | |
# Major depressive disorder C1269683 | |
# Bipolar disorder C0005586 | |
genes_SCZ <- disease2gene( disease = c("C0036341"), | |
database = "CURATED", verbose = TRUE )@qresult | |
genes_MDD <- disease2gene( disease = c("C1269683"), | |
database = "CURATED", verbose = TRUE )@qresult | |
genes_BIP <- disease2gene( disease = c("C0005586"), | |
database = "CURATED", verbose = TRUE )@qresult | |
genesymbols <- read.table(file = paste0(folder_tables, "/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_symbolID.txt"), | |
header = FALSE) | |
genesymbols <- sapply(genesymbols$V1, toupper) | |
bgsymbols <- read.table(file = paste0(folder_tables, "/06_background_symbolID.txt"), | |
header = FALSE) | |
bgsymbols <- sapply(bgsymbols$V1, toupper) | |
pval <- data.frame(disease = c("SCZ", "MDD", "BIP"), pvalue = c(1,1,1)) | |
diseaseGenes <- data.frame(disease = character(), genes = character()) | |
# hypergeometric test SCZ | |
tmp <- intersect(genes_SCZ$gene_symbol, genesymbols) | |
diseaseGenes <- rbind(diseaseGenes, cbind(rep("SCZ", times = length(tmp)), tmp)) | |
o <- length(intersect(genes_SCZ$gene_symbol, genesymbols)) | |
o_b <- length(intersect(genes_SCZ$gene_symbol, bgsymbols)) | |
test <- fisher.test(matrix(c(o, o_b-o, length(genesymbols)-o, length(bgsymbols)-length(genesymbols)-o_b+o), 2, 2), | |
alternative='greater')$p.value | |
pval[1,2] <- test | |
# hypergeometric test MDD | |
tmp <- intersect(genes_MDD$gene_symbol, genesymbols) | |
diseaseGenes <- rbind(diseaseGenes, cbind(rep("MDD", times = length(tmp)), tmp)) | |
o <- length(intersect(genes_MDD$gene_symbol, genesymbols)) | |
o_b <- length(intersect(genes_MDD$gene_symbol, bgsymbols)) | |
test <- fisher.test(matrix(c(o, o_b-o, length(genesymbols)-o, length(bgsymbols)-length(genesymbols)-o_b+o), 2, 2), | |
alternative='greater')$p.value | |
pval[2,2] <- test | |
# hypergeometric test BIP | |
tmp <- intersect(genes_BIP$gene_symbol, genesymbols) | |
diseaseGenes <- rbind(diseaseGenes, cbind(rep("BIP", times = length(tmp)), tmp)) | |
o <- length(intersect(genes_BIP$gene_symbol, genesymbols)) | |
o_b <- length(intersect(genes_BIP$gene_symbol, bgsymbols)) | |
test <- fisher.test(matrix(c(o, o_b-o, length(genesymbols)-o, length(bgsymbols)-length(genesymbols)-o_b+o), 2, 2), | |
alternative='greater')$p.value | |
pval[3,2] <- test | |
pval[,2] <- p.adjust(pval[,2], method = "fdr") | |
# plot disease gene enrichment | |
ggplot(pval, aes(x=disease, y=-log10(pvalue))) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
coord_flip() + | |
xlab("disease") + | |
ylab("-log10(FDR)") + | |
ggtitle("Enrichment of known disease genes among DE genes shared between all regions") + | |
theme(text = element_text(size=15), | |
plot.title = element_text(size = 15)) | |
ggsave(filename = paste0(folder_plots, "/07_diseaseGeneEnrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.png")) | |
write.table(diseaseGenes, file = paste0(folder_tables, "/07_diseaseGenes_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.txt"), | |
quote = FALSE, row.names = FALSE, col.names = FALSE) | |
write.table(pval, file = paste0(folder_tables, "/07_diseaseGenes_pvalues_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1.txt"), | |
quote = FALSE, row.names = FALSE) | |
# 2. Hippocampus: dorsal vs ventral ------------ | |
intersect_hip <- read.table(file = paste0(folder_tables, "/06_HIP_intersectDorsalVentral_entrezID.txt"), | |
header = FALSE) | |
diff_dorsal <- read.table(file = paste0(folder_tables, "/06_HIP_diffDorsalVentral_entrezID.txt"), | |
header = FALSE) | |
diff_ventral <- read.table(file = paste0(folder_tables, "/06_HIP_diffVentralDorsal_entrezID.txt"), | |
header = FALSE) | |
union_hip <- read.table(file = paste0(folder_tables, "/06_HIP_unionDorsalVentral_entrezID.txt"), | |
header = FALSE) | |
# !!!! BACKGROUND are only diff exp genes in any HIP area here !!!! | |
# union_hip <- background | |
modules <- rep("XXX", nrow(union_hip)) | |
modules[which(union_hip$V1 %in% intersect_hip$V1)] <- "intersect" | |
modules[which(union_hip$V1 %in% diff_dorsal$V1)] <- "dorsal" | |
modules[which(union_hip$V1 %in% diff_ventral$V1)] <- "ventral" | |
# enrichment | |
GOenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = union_hip$V1, | |
refCollection = GOcollection, | |
useBackground = "given", | |
nBestDataSets = length(GOcollection$dataSets), | |
# threshold = 0.1, | |
# thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE | |
) | |
# Filter to the BP terms and exclude terms with less 10 genes overlap | |
# Exclude terms with nominal pvalue above 0.1 | |
table.display <- GOenrichment$enrichmentTable %>% | |
filter(inGroups == "GO|GO.BP|GO" & class != "XXX") %>% | |
filter(nCommonGenes >= 10 & pValue <= 0.1) %>% | |
group_by(class) %>% slice_min(order_by = FDR, n = 10) | |
write.csv(GOenrichment$enrichmentTable, file = paste0(folder_tables,"/07_GOenrichment_HIP.csv"), | |
row.names = FALSE) | |
# Plot results (plotted pvalues are not adjusted for multiple testing) | |
table.display %>% | |
# arrange(desc(class)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=rev(dataSetName))) %>% | |
ggplot(aes(x=dataSetName, y = -log10(FDR), fill = class)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
coord_flip() + | |
xlab("GOterm") + | |
ggtitle("GO terms enriched for DE genes in Hippocampus") | |
ggsave(filename = paste0(folder_plots, "/07_GOenrichment_HIP.png")) | |
# 2.2 NCBI BioSystems collection ----------------- | |
biosysCollection <- BioSystemsCollection("mouse") | |
knownGroups(biosysCollection) | |
# enrichment | |
KEGGenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = union_hip$V1, | |
refCollection = biosysCollection, | |
useBackground = "given", | |
threshold = 0.1, | |
thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE | |
) | |
collectGarbage() | |
names(KEGGenrichment) | |
names(KEGGenrichment$enrichmentTable) | |
table.display <- KEGGenrichment$enrichmentTable | |
table.display$overlapGenes <- shortenStrings(table.display$overlapGenes, maxLength = 70, | |
split = "|") | |
write.csv(GOenrichment$enrichmentTable, file = paste0(folder_tables,"/07_PathwayEnrichment_HIP.csv"), | |
row.names = FALSE) | |
# plot results (plotted pvalues are not adjusted for multiple testing) | |
table.display %>% | |
arrange(desc(class)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(pValue), fill=class)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
coord_flip() + | |
ggtitle("Pathways enriched for DE genes in Hippocampus") | |
ggsave(filename = paste0(folder_plots, "/07_PathwayEnrichment_HIP.png")) | |
# 3.1 GO enrichment all regions without PFC --------------------- | |
# !!! not really interesting --> 9 genes from which 5 are nominally sig also in PFC !!!! | |
GOcollection <- buildGOcollection(organism = "mouse") | |
genes <- read.table(file = paste0(folder_tables, "/06_diff_allRegionsExceptPFC.txt"), | |
header = TRUE) | |
background <- read.table(file = paste0(folder_tables, "/06_background_entrezID.txt"), | |
header = FALSE) | |
modules <- rep("not_significant", nrow(background)) | |
modules[which(background$V1 %in% genes$ENTREZID)] <- "significant" | |
# enrichment | |
GOenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = background$V1, | |
refCollection = GOcollection, | |
useBackground = "given", | |
# threshold = 0.1, | |
# thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE, | |
ignoreLabels = "not_significant" | |
) | |
table.display <- GOenrichment$enrichmentTable | |
write.csv(table.display, file = paste0(folder_tables,"/07_GOenrichment_AMY-CER-PVN-dDG-vDG-dCA1-vCA1.csv"), | |
row.names = FALSE) | |
# plot results (plotted pvalues are not adjusted for multiple testing) | |
table.display %>% | |
arrange(desc(pValue)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(pValue), fill = inGroups)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
scale_fill_discrete(name = "Ontology", | |
labels = c("BP", "MF", "CC")) + | |
coord_flip() + | |
ylab("-log10(pValue)") + | |
xlab("GOterm") + | |
ggtitle("GO terms enriched for DE genes across all brain regions except PFC") + | |
theme(text = element_text(size=15), | |
plot.title = element_text(size = 15, hjust=1)) | |
ggsave(filename = paste0(folder_plots, "/07_GOenrichment_AMY-CER-PVN-dDG-vDG-dCA1-vCA1.png")) | |
# 4.1 GO enrichment only PFC and CER --------------------- | |
genes <- read.table(file = paste0(folder_tables, "/06_overlap_CER-PFC_only_entrezID.txt"), | |
header = FALSE) | |
background <- read.table(file = paste0(folder_tables, "/06_background_entrezID.txt"), | |
header = FALSE) | |
modules <- rep("not_significant", nrow(background)) | |
modules[which(background$V1 %in% genes$V1)] <- "significant" | |
# enrichment | |
GOenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = background$V1, | |
refCollection = GOcollection, | |
useBackground = "given", | |
# threshold = 0.1, | |
# thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE, | |
ignoreLabels = "not_significant" | |
) | |
table.display <- GOenrichment$enrichmentTable | |
write.csv(table.display, file = paste0(folder_tables,"/07_GOenrichment_CER-PFC.csv"), | |
row.names = FALSE) | |
# plot results (plotted pvalues are not adjusted for multiple testing) | |
table.display %>% | |
arrange(desc(pValue)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(pValue), fill = inGroups)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
scale_fill_discrete(name = "Ontology", | |
labels = c("BP", "MF", "CC")) + | |
coord_flip() + | |
ylab("-log10(pValue)") + | |
xlab("GOterm") + | |
ggtitle("GO terms enriched for DE genes across all brain regions except PFC") + | |
theme(text = element_text(size=15), | |
plot.title = element_text(size = 15, hjust=1)) | |
ggsave(filename = paste0(folder_plots, "/07_GOenrichment_AMY-CER-PVN-dDG-vDG-dCA1-vCA1.png")) | |
# 5.1 GO enrichment all regions upregulated--------------------- | |
genes <- read.table(file = paste0(folder_tables, "/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_up_entrezID.txt"), | |
header = FALSE) | |
background <- read.table(file = paste0(folder_tables, "/06_background_entrezID.txt"), | |
header = FALSE) | |
modules <- rep("not_significant", nrow(background)) | |
modules[which(background$V1 %in% genes$V1)] <- "significant" | |
# GO enrichment | |
GOenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = background$V1, | |
refCollection = GOcollection, | |
useBackground = "given", | |
nBestDataSets = length(GOcollection$dataSets), | |
# threshold = 0.1, | |
# thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE, | |
ignoreLabels = "not_significant" | |
) | |
# Exclude terms with less than 10 genes overlap or FDR above 0.1 | |
table.display <- GOenrichment$enrichmentTable %>% | |
filter(nCommonGenes >= 10 & FDR <= 0.1) | |
write.csv(table.display, file = paste0(folder_tables,"/07_GOenrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_up.csv"), | |
row.names = FALSE) | |
# Plot results (Top 20) | |
table.display %>% | |
head(20) %>% | |
arrange(desc(FDR)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(FDR), fill = inGroups)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
scale_fill_discrete(name = "Ontology", | |
labels = c("BP", "MF", "CC")) + | |
coord_flip() + | |
ylab("-log10(FDR)") + | |
xlab("GOterm") + | |
ggtitle("GO terms enriched for upregulated DE genes across all brain regions (Top 20)") + | |
theme(text = element_text(size=15), | |
plot.title = element_text(size = 15, hjust=1)) | |
ggsave(filename = paste0(folder_plots, "/07_GOenrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_up.png")) | |
# 5.2 GO enrichment all regions downregulated--------------------- | |
genes <- read.table(file = paste0(folder_tables, "/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_down_entrezID.txt"), | |
header = FALSE) | |
background <- read.table(file = paste0(folder_tables, "/06_background_entrezID.txt"), | |
header = FALSE) | |
modules <- rep("not_significant", nrow(background)) | |
modules[which(background$V1 %in% genes$V1)] <- "significant" | |
# GO enrichment | |
GOenrichment <- enrichmentAnalysis( | |
classLabels = modules, | |
identifiers = background$V1, | |
refCollection = GOcollection, | |
useBackground = "given", | |
nBestDataSets = length(GOcollection$dataSets), | |
# threshold = 0.1, | |
# thresholdType = "Bonferroni", | |
getOverlapEntrez = TRUE, | |
getOverlapSymbols = TRUE, | |
ignoreLabels = "not_significant" | |
) | |
# Exclude terms with less than 10 genes overlap or FDR above 0.1 | |
table.display <- GOenrichment$enrichmentTable %>% | |
filter(nCommonGenes >= 10 & FDR <= 0.1) | |
write.csv(table.display, file = paste0(folder_tables,"/07_GOenrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_down.csv"), | |
row.names = FALSE) | |
# Plot results (Top 20) | |
table.display %>% | |
head(20) %>% | |
arrange(desc(FDR)) %>% | |
mutate(dataSetName=factor(dataSetName,levels=dataSetName)) %>% | |
ggplot(aes(x=dataSetName, y = -log10(FDR), fill = inGroups)) + | |
geom_bar(stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
scale_fill_discrete(name = "Ontology", | |
labels = c("BP", "MF", "CC")) + | |
coord_flip() + | |
ylab("-log10(FDR)") + | |
xlab("GOterm") + | |
ggtitle("GO terms enriched for downregulated DE genes across all brain regions (Top 20)") + | |
theme(text = element_text(size=15), | |
plot.title = element_text(size = 15, hjust=1)) | |
ggsave(filename = paste0(folder_plots, "/07_GOenrichment_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_down.png")) |