Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DiffBrainNet/03_CoExp_Analysis/10_GOenrichment_comparisonHubAndDE.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
186 lines (150 sloc)
7.11 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
## Project: DexStim Mouse Brain | |
## Date: 05.10.2021 | |
## Author: Nathalie | |
################################################## | |
# Compare GO enrichment of unique DE and hub genes | |
library(data.table) | |
library(dplyr) | |
library(ggplot2) | |
library(org.Mm.eg.db) | |
library(clusterProfiler) | |
basepath <- "~/Documents/ownCloud/DexStim_RNAseq_Mouse/" | |
regions <- c("AMY", "CER", "dCA1", "dDG", "PFC", "PVN", "vCA1", "vDG") | |
beta_cutoff <- 0.01 | |
folder_plots <- paste0("figures") | |
folder_tables <- paste0("tables") | |
# 1. Read DE and hub genes from all regions and background ---------- | |
# DE genes | |
list_reg_de <- list() | |
for (reg in regions){ | |
res <- read.table(file=paste0(basepath, folder_tables, "/02_", reg, "_deseq2_Dex_1_vs_0_lfcShrink.txt"), | |
sep="\t", header = TRUE) | |
res <- res[res$padj <= 0.1,] | |
res$gene_symbol <- mapIds(org.Mm.eg.db, keys = res$Ensembl_ID, | |
keytype = "ENSEMBL", column="SYMBOL") | |
res$entrez <- mapIds(org.Mm.eg.db, keys = res$Ensembl_ID, | |
keytype = "ENSEMBL", column="ENTREZID") | |
list_reg_de[[reg]] <- res | |
} | |
# hub genes | |
list_reg_hub <- list() | |
for (reg in regions){ | |
res <- fread(paste0(basepath, "tables/coExpression_kimono/03_AnalysisFuncoup/04_", | |
reg,"_funcoup_differential_nodeBetweennessNorm_betacutoff",beta_cutoff,".csv")) | |
res <- res[res$nodebetweenness_norm>=1.0 & ! is.na(res$nodebetweenness_norm)] | |
res$entrez <- mapIds(org.Mm.eg.db, keys = res$ensembl_id, | |
keytype = "ENSEMBL", column="ENTREZID") | |
list_reg_hub[[reg]] <- res | |
} | |
# background are all genes in out dataset | |
background <- read.table(file = paste0(basepath, "tables/06_background_entrezID.txt"), | |
header = FALSE)[,1] | |
# 2. check uniqueness of DE and hub genes --------------- | |
# DE genes | |
for (reg in regions){ | |
index_reg <- which(regions == reg) | |
df <- bind_rows(list_reg_de[-index_reg], .id="region") | |
list_reg_de[[reg]]$regions_DE <- sapply(list_reg_de[[reg]]$Ensembl_ID, | |
function(x) paste(df[df$Ensembl_ID == x,]$region, collapse = " ")) | |
list_reg_de[[reg]]$unique_DE <- sapply(list_reg_de[[reg]]$regions_DE, | |
function(x) x == "") | |
} | |
# hub genes | |
for (reg in regions){ | |
index_reg <- which(regions == reg) | |
df <- bind_rows(list_reg_hub[-index_reg], .id="region") | |
list_reg_hub[[reg]]$regions_hub <- sapply(list_reg_hub[[reg]]$ensembl_id, | |
function(x) paste(df[df$ensembl_id == x,]$region, collapse = " ")) | |
list_reg_hub[[reg]]$unique_hub <- sapply(list_reg_hub[[reg]]$regions_hub, | |
function(x) x == "") | |
} | |
# 2. Plot top GO enrichment of DE and hub genes per brain regions | |
#minCount <- 0 | |
#minCount <- 5 | |
# minCount <- 10 | |
# --> changed to minCount with regard to number of genes in geneset | |
for (reg in regions){ | |
# Unique DE genes | |
genes <- list_reg_de[[reg]]$entrez[list_reg_de[[reg]]$unique_DE] | |
# GO enrichment for unique DE genes | |
ego_de <- enrichGO(gene = as.character(genes), | |
universe = as.character(background), | |
OrgDb = org.Mm.eg.db, | |
ont = "BP", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.01, | |
qvalueCutoff = 0.05, | |
# pvalueCutoff = 1, | |
# qvalueCutoff = 1, | |
minGSSize = 5, # min number of genes associated with GO term | |
maxGSSize = 10000, # max number of genes associated with GO term | |
readable = TRUE)@result | |
# min number of genes overlapping | |
min_count <- ceiling(length(genes)*0.15) | |
ego_de_filt <- ego_de[ego_de$Count >= min_count,] | |
# Unique hub genes | |
genes <- list_reg_hub[[reg]]$entrez[list_reg_hub[[reg]]$unique_hub] | |
# GO enrichment for unique hub genes | |
ego_hub <- enrichGO(gene = as.character(genes), | |
universe = as.character(background), | |
OrgDb = org.Mm.eg.db, | |
ont = "BP", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.01, | |
qvalueCutoff = 0.05, | |
# pvalueCutoff = 1, | |
# qvalueCutoff = 1, | |
minGSSize = 5, # min number of genes associated with GO term | |
maxGSSize = 10000, # max number of genes associated with GO term | |
readable = TRUE)@result | |
# min number of genes overlapping | |
min_count <- ceiling(length(genes)*0.15) | |
ego_hub_filt <- ego_hub[ego_hub$Count >= min_count,] | |
# Plot top 10 genes of DE and hub with comparison to each other | |
# Top 10 terms for DE genes | |
ego_de_top10 <- head(ego_de_filt, n = 10) %>% | |
dplyr::mutate("main" = TRUE) | |
ego_de_top10_hub <- ego_hub[ego_hub$ID %in% ego_de_top10$ID,] %>% | |
dplyr::mutate("main" = FALSE) | |
ego_de_plot <- bind_rows("de" = ego_de_top10, "hub" = ego_de_top10_hub, | |
.id = "groups") | |
# Top 10 terms for hub genes | |
ego_hub_top10 <- head(ego_hub_filt, n = 10) %>% | |
dplyr::mutate("main" = TRUE) | |
ego_hub_top10_de <- ego_de[ego_de$ID %in% ego_hub_top10$ID,] %>% | |
dplyr::mutate("main" = FALSE) | |
ego_hub_plot <- bind_rows("hub" = ego_hub_top10, "de" = ego_hub_top10_de, | |
.id = "groups") | |
# Combine them into one dataframe | |
ego_plot <- bind_rows("de" = ego_de_plot, "hub" = ego_hub_plot, | |
.id = "subplot") | |
ego_plot$Description <- factor(ego_plot$Description, | |
levels = unique(ego_plot$Description)) | |
# labeller function for facet titles | |
facet_names <- c( | |
'de'="Top 10 GO terms for DE genes", | |
'hub'="Top 10 GO terms for hub genes" | |
) | |
# Plot them all together | |
print(ggplot(ego_plot, aes(x=Description, y = -log10(p.adjust), | |
fill = groups, colour = main)) + | |
geom_bar(position = position_dodge2(reverse=TRUE), stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
scale_colour_manual(values = c("grey", "black"), guide = FALSE) + | |
scale_fill_manual(name = "Geneset", | |
values = c("orange", "darkred"), | |
labels = c("DE genes", "hub genes")) + | |
coord_flip() + | |
scale_x_discrete(limits = rev) + | |
xlab("GOterm") + | |
ggtitle(paste0("GO terms enriched for DE and hub genes only in ",reg)) + | |
facet_wrap(~subplot, scales="free", labeller = as_labeller(facet_names)) + | |
theme(axis.text.y = element_text(size = 10))) | |
# ggsave(filename = paste0(basepath, folder_plots, "/02_CoExp_Kimono/03_AnalysisFuncoup/comparisonRegions/", | |
# "10_",reg,"_GOterms_DEandHubGenes_min", minCount,".png"), | |
# width = 13, height = 7) | |
ggsave(filename = paste0(basepath, folder_plots, "/02_CoExp_Kimono/03_AnalysisFuncoup/comparisonRegions/", | |
"10_",reg,"_GOterms_DEandHubGenes.pdf"), | |
width = 11, height = 7) | |
} | |