diff --git a/04_PlotsManuscript/01_FigureII_C.R b/04_PlotsManuscript/01_FigureII_C.R new file mode 100644 index 0000000..69f79c8 --- /dev/null +++ b/04_PlotsManuscript/01_FigureII_C.R @@ -0,0 +1,89 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 21.11.2021 +## Author: Nathalie +################################################## +# GO Enrichment plot for manuscript +# Unique DE and hub genes in PFC + +# set working directory to source file location +setwd("~/Documents/ownCloud/DexStim_RNAseq_Mouse/scripts/07_PlotsManuscript") + +library(readxl) +library(dplyr) +library(stringr) +library(ggplot2) + + +# 1. GO enrichment unique DE and hub genes in PFC + +data_hub <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/FUMA_diff unique hubgenes_gene2func67664/PFC diff unique hubgenes_FUMA.xlsx", + sheet = "GO bp") + +data_DE <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/DE PFC/FUMA_gene2func66314/PFC unique DE genes _FUMA.xlsx", + sheet = "GO bp") + + +# data for left panel with top 14 terms for DE +top_DE <- data_DE %>% + dplyr::filter(N_overlap_A > 24) +top_DE <- top_DE[order(top_DE$`my adj p BH`, decreasing = FALSE),][1:14,] + +top_DE_hub <- data_hub %>% + filter(GeneSet %in% top_DE$GeneSet) + +top_DE_comb <- bind_rows("de" = top_DE, "hub" = top_DE_hub, .id = "group") + + +# data for right panel with top 14 terms for hubs +top_hub <- data_hub %>% + dplyr::filter(N_overlap_A > 2) +top_hub <- top_hub[order(top_hub$`my adj p BH`, decreasing = FALSE),][1:14,] + +top_hub_DE <- data_DE %>% + filter(GeneSet %in% top_hub$GeneSet) + +missing_term <- setdiff(top_hub$GeneSet, top_hub_DE$GeneSet) +add_entry <- data.frame("GeneSet" = missing_term) +top_hub_DE <- bind_rows(top_hub_DE, add_entry) + +top_hub_comb <- bind_rows("hub" = top_hub, "de" = top_hub_DE, .id = "group") + + +# combine everything into one df for plotting +top_data <- bind_rows("de" = top_DE_comb, "hub" = top_hub_comb, .id = "panel") +# remove the "GO_" from GO terms +top_data$GeneSet <- str_replace_all(top_data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +top_data$GeneSet <- str_replace_all(top_data$GeneSet, "_", " ") +# only capitalize first letter per word +top_data$GeneSet <- str_to_title(top_data$GeneSet) +# of and to should start with lower case +top_data$GeneSet <- str_replace_all(top_data$GeneSet, "Of", "of") +top_data$GeneSet <- str_replace_all(top_data$GeneSet, "To", "to") + +# labeller function for facet titles +facet_names <- c( + 'de'="Top 14 GO terms for DE genes", + 'hub'="Top 14 GO terms for hub genes" +) + +# barplot +g <- ggplot(top_data, aes(x = GeneSet, y = `[-log10 fdr`, fill = group)) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity") + + + scale_colour_manual(values = c("grey", "black"), guide = FALSE) + + scale_fill_manual(name = "Geneset", + values = c("orange", "darkred"), + labels = c("DE genes", "hub genes")) + + coord_flip() + + scale_x_discrete(limits = rev) + + xlab("GOterm") + + ylab("-log10(FDR)") + + ggtitle("GO terms enriched for unique DE and hub genes in PFC") + + facet_wrap(~panel, scales="free", labeller = as_labeller(facet_names)) + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("01_FigureII_C.pdf", g, width = 14, height = 8) + diff --git a/04_PlotsManuscript/02_FigureII_F.R b/04_PlotsManuscript/02_FigureII_F.R new file mode 100644 index 0000000..6b2283e --- /dev/null +++ b/04_PlotsManuscript/02_FigureII_F.R @@ -0,0 +1,83 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 22.11.2021 +## Author: Nathalie +################################################## +# Pathway enrichment plot for manuscript +# Abcd1 and neighbourhood in diff network of PFC + +library(readxl) + +# 1. Pathway enrichment for Abcd1 and neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/Abcd1 gene neighbourhood_FUMA_gene2func67210/Abcd1_diffnetwork_FUMA.xlsx", + sheet = "KEGG AND REACTOME") + +data <- arrange(data, desc(`[-log10FDR]`)) +data <- data[1:20,] + + +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# Ii from "Polymerase II" back to upper case +data$GeneSet <- str_replace_all(data$GeneSet, "Reactome", "Reactome:") +data$GeneSet <- str_replace_all(data$GeneSet, "Kegg", "KEGG:") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-log10FDR]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.9,1,1)) + +ggexport(comb, filename = "02_FigureII_F.pdf", width = 14, height = 8) diff --git a/04_PlotsManuscript/03_FigureIII_B.R b/04_PlotsManuscript/03_FigureIII_B.R new file mode 100644 index 0000000..977faf3 --- /dev/null +++ b/04_PlotsManuscript/03_FigureIII_B.R @@ -0,0 +1,83 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 29.11.2021 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# vCA1 unique DE genes enrichments + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure III_vCA1/DE vCA1/FUMA_gene2func67310 (2)/vCA1 Unique DE genes_FUMA enrichments.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-log10 of FDR`)) +data <- data[1:20,] + + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-log10 of FDR`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(2.5,1,1)) + +ggexport(comb, filename = "03_FigureIII_B.pdf", width = 14, height = 8) diff --git a/04_PlotsManuscript/04_FigureIII_C.R b/04_PlotsManuscript/04_FigureIII_C.R new file mode 100644 index 0000000..ba9e55f --- /dev/null +++ b/04_PlotsManuscript/04_FigureIII_C.R @@ -0,0 +1,84 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 29.11.2021 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# vCA1 unique DE genes with neighbours enrichments + + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes and their diff neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure III_vCA1/network vCA1/Unique DEs and their diff neighbours_FUMA_gene2func67313/vCA1 Unique DE genes+diff neighbours_FUMA enrichments.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-LOG10 of FDR`)) +data <- data[1:20,] + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-LOG10 of FDR`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.8,1,1)) + +ggexport(comb, filename = "03_FigureIII_C.pdf", width = 12, height = 8) diff --git a/04_PlotsManuscript/05_FigureIV_Bbase.R b/04_PlotsManuscript/05_FigureIV_Bbase.R new file mode 100644 index 0000000..f996410 --- /dev/null +++ b/04_PlotsManuscript/05_FigureIV_Bbase.R @@ -0,0 +1,90 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 29.11.2021 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# Tcf4 baseline network in PFC + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes and their diff neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure IV_GWAS/TCF4/FUMA_baselin Tcf4PFCnetwork_gene2func68958/FUMA Tcf4BaselinePFCnetwork.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-log10]`)) +data <- data[1:25,] + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# make some terms shorter with abbreviations +data$GeneSet <- str_replace_all(data$GeneSet, "Positive Regulation", "Pos. Reg.") +data$GeneSet <- str_replace_all(data$GeneSet, "Negative Regulation", "Neg. Reg.") +# Ii from "Polymerase II" back to upper case +data$GeneSet <- str_replace_all(data$GeneSet, "Ii", "II") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-log10]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.9,1,1)) + +ggexport(comb, filename = "05_FigureIV_Bbase.pdf", width = 14, height = 10) + + diff --git a/04_PlotsManuscript/05_FigureIV_Bdiff.R b/04_PlotsManuscript/05_FigureIV_Bdiff.R new file mode 100644 index 0000000..ddd3f64 --- /dev/null +++ b/04_PlotsManuscript/05_FigureIV_Bdiff.R @@ -0,0 +1,88 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 29.11.2021 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# Tcf4 differential network in PFC + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes and their diff neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure IV_GWAS/TCF4/FUMA_diffTcf4PFCnetwork_gene2func68788/FUMA_diffPFCTcf4.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-log10]`)) +data <- data[1:25,] + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# make some terms shorter with abbreviations +data$GeneSet <- str_replace_all(data$GeneSet, "Positive Regulation", "Pos. Reg.") +data$GeneSet <- str_replace_all(data$GeneSet, "Negative Regulation", "Neg. Reg.") +# Ii from "Polymerase II" back to upper case +data$GeneSet <- str_replace_all(data$GeneSet, "Ii", "II") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-log10]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.9,1,1)) + +ggexport(comb, filename = "05_FigureIV_Bdiff.pdf", width = 14, height = 10) \ No newline at end of file diff --git a/04_PlotsManuscript/06_FigureSIII_A.R b/04_PlotsManuscript/06_FigureSIII_A.R new file mode 100644 index 0000000..01122cd --- /dev/null +++ b/04_PlotsManuscript/06_FigureSIII_A.R @@ -0,0 +1,72 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 29.11.2021 +## Author: Nathalie +################################################## +# GWAS enrichment plot for manuscript +# vCA1 + +library(readxl) +library(dplyr) +library(ggplot2) +library(ggpubr) + +# 1. Pathway enrichment for Abcd1 and neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure III_vCA1/network vCA1/Unique DEs and their diff neighbours_FUMA_gene2func67313/vCA1 Unique DE genes+diff neighbours_FUMA enrichments.xlsx", + sheet = "GWAS") + +data <- arrange(data, desc(`[-log10 of FDR`)) +data <- data[data$`my adj p BH` <= 0.05,] +data <- data[!is.na(data$`my adj p BH`),] + +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-log10 of FDR`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.8,1,1)) + +ggexport(comb, filename = "06_FigureSIII_A.pdf", width = 14, height = 8) diff --git a/04_PlotsManuscript/07_FigureSIV.R b/04_PlotsManuscript/07_FigureSIV.R new file mode 100644 index 0000000..9afb5a5 --- /dev/null +++ b/04_PlotsManuscript/07_FigureSIV.R @@ -0,0 +1,120 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 29.11.2021 +## Author: Nathalie +################################################## +# Pathway enrichment plot for manuscript +# Tcf4 Differential Expression + +library(stringr) +library(ggplot2) + +regions <- c("AMY", "CER", "dCA1", "dDG", "PFC", "PVN", "vCA1", "vDG") + +ensembl_tcf4 <- "ENSMUSG00000053477" + +# Panel A: Expression values +df <- data.frame("region" = character(), + "treatment" = character(), + "sample" = character(), + "expression" = numeric()) + +for (reg in regions){ + + # read vsd normalized data --> better raw data? + data_exp <- read.table(paste0("~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/02_", + reg, "_deseq2_expression_vsd.txt"), + header = TRUE, sep = "\t") + # subset Tcf4 row + tcf4 <- data_exp[ensembl_tcf4,] + + # get column indices for cntrl samples + indices_cntrl <- str_detect(colnames(data_exp), "CNTRL") + + # df for cntrl samples + exp_cntrl <- data.frame("expression" = t(tcf4[,indices_cntrl]), + "sample" = rownames(t(tcf4[,indices_cntrl]))) %>% + rename(expression = ENSMUSG00000053477) %>% + mutate("treatment" = "CNTRL", "region" = reg) + df <- rbind(df, exp_cntrl) + + # df for dex samples + exp_dex <- data.frame("expression" = t(tcf4[,!indices_cntrl]), + "sample" = rownames(t(tcf4[,!indices_cntrl]))) %>% + rename(expression = ENSMUSG00000053477) %>% + mutate("treatment" = "DEX", "region" = reg) + df <- rbind(df, exp_dex) + +} + +df$treatment <- factor(df$treatment) +df$region <- factor(df$region) + +ggplot(df, aes(x = region, y = expression, fill = treatment)) + + geom_boxplot() + + scale_fill_manual("", + breaks = c("CNTRL", "DEX"), + labels = c("Baseline", "Treatment"), + values = c("#B0BFBB", "#46866E")) + + theme_light() + + theme(text = element_text(size= 14)) + + xlab("Brain Region") + + ylab("Norm. Expression Level") + +ggsave(filename = "07_FigureSIV_A.pdf", width = 12, height = 8) + + +# Panel B: Fold Change in AMY, vDG and dDG + +list_tcf4 <- list() + +for (reg in c("AMY", "dDG", "vDG")){ +# for (reg in regions){ + + # read DE results + data_exp <- read.table(paste0("~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/02_", + reg, "_deseq2_Dex_1_vs_0_lfcShrink.txt"), + header = TRUE, sep = "\t") + tcf4 <- as.data.frame(data_exp[data_exp$Ensembl_ID == ensembl_tcf4,]) + print(tcf4) + + list_tcf4[[reg]] <- tcf4 + +} + +df_tcf4 <- bind_rows(list_tcf4, .id = "region") + + +# bar plot Fold Change +fc <- ggplot(df_tcf4, aes(x = region, y = log2FoldChange) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "darkgrey") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("log2(FoldChange)") + + xlab("Brain Region") + +# barplot fdr p-value +fdr <- ggplot(df_tcf4, aes(x = region, y = -log10(padj)) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + xlab("") + +# combined barplot +comb <- ggarrange(fc, + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.3,1)) + +# ggexport(comb, filename = "07_FigureSIV_B_allRegions.pdf", width = 12, height = 8) +ggexport(comb, filename = "07_FigureSIV_B.pdf", width = 12, height = 8) + diff --git a/04_PlotsManuscript/08_FigureIII_A.R b/04_PlotsManuscript/08_FigureIII_A.R new file mode 100644 index 0000000..f71384a --- /dev/null +++ b/04_PlotsManuscript/08_FigureIII_A.R @@ -0,0 +1,46 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# Numbers/Percentages of DE and hub genes in vCA1 +# including also number of unique ones + +library(ggplot2) + +# correct numbers? --> DE percentage different than before +DE_genes <- 465 +hub_genes <- 260 + +unique_DE <- 25 +unique_hub <- 24 + +# data frame for plotting +df <- data.frame("type" = c("DE", "DE", "hub", "hub"), + "unique" = c(TRUE, FALSE, TRUE, FALSE), + "number" = c(unique_DE, DE_genes - unique_DE, + unique_hub, hub_genes - unique_hub), + "text" = c(paste0(round(unique_DE/DE_genes*100,1),"%"), "", + paste0(round(unique_hub/hub_genes*100,1), "%"), "")) + +# barplot +ggplot(df, aes(x = type, y = number, fill = type)) + + geom_bar(position = "stack", stat="identity", aes(alpha = unique)) + + geom_text(aes(y = number, label = text), vjust = 1.5) + + scale_alpha_manual("", + breaks = c(FALSE, TRUE), + values = c(1.0,0.5), + labels = c("Shared genes", "Unique genes")) + + xlab("") + + ylab("# DE and hub genes in vCA1") + + scale_fill_manual("", + breaks = c("DE", "hub"), + labels = c("DE genes", "Hub genes"), + values = c("orange", "darkred")) + + scale_x_discrete(breaks = c("DE", "hub"), + labels = c("DE genes", "Hub genes")) + + theme_light() + + theme(text = element_text(size= 14)) + + guides(fill = "none") + +ggsave(filename = "08_FigureIII_A.pdf", width = 8, height = 6) diff --git a/04_PlotsManuscript/09_FigureII_DandE.R b/04_PlotsManuscript/09_FigureII_DandE.R new file mode 100644 index 0000000..2d195b9 --- /dev/null +++ b/04_PlotsManuscript/09_FigureII_DandE.R @@ -0,0 +1,104 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# Expression level plots Syn3 and Abcd1 in PFC + +library(stringr) +library(ggplot2) + +reg <- "PFC" + +ensembl_syn3 <- "ENSMUSG00000059602" +ensembl_abcd1 <- "ENSMUSG00000031378" + +# read vsd normalized data +data_exp <- read.table(paste0("~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/02_", + reg, "_deseq2_expression_vsd.txt"), + header = TRUE, sep = "\t") + + +### Syn3 ---------------- + +# subset Syn3 row +syn3 <- data_exp[ensembl_syn3,] + +# get column indices for cntrl samples +indices_cntrl <- str_detect(colnames(data_exp), "CNTRL") + +# df for cntrl samples +exp_cntrl <- data.frame("expression" = t(syn3[,indices_cntrl]), + "sample" = rownames(t(syn3[,indices_cntrl]))) %>% + rename(expression = all_of(ensembl_syn3)) %>% + mutate("treatment" = "CNTRL") + +# df for dex samples +exp_dex <- data.frame("expression" = t(syn3[,!indices_cntrl]), + "sample" = rownames(t(syn3[,!indices_cntrl]))) %>% + rename(expression = all_of(ensembl_syn3)) %>% + mutate("treatment" = "DEX") + +# combine cntrl and dex df +df <- rbind(exp_cntrl, exp_dex) +df$treatment <- factor(df$treatment) + +ggplot(df, aes(x = treatment, y = expression, fill = treatment)) + + geom_boxplot() + + geom_jitter() + + scale_fill_manual("", + breaks = c("CNTRL", "DEX"), + labels = c("Baseline", "Treatment"), + values = c("#B0BFBB", "#46866E")) + + scale_x_discrete(breaks = c("CNTRL", "DEX"), + labels = c("Baseline", "Treatment")) + + theme_light() + + theme(text = element_text(size= 16)) + + xlab("") + + ylab("Norm. Expression Level") + + guides(fill = "none") + +ggsave(filename = "09_FigureII_D.pdf", width = 7, height = 6) + + + +### Abcd1 ---------------- + +# subset Abcd1 row +abcd1 <- data_exp[ensembl_abcd1,] + +# get column indices for cntrl samples +indices_cntrl <- str_detect(colnames(data_exp), "CNTRL") + +# df for cntrl samples +exp_cntrl <- data.frame("expression" = t(abcd1[,indices_cntrl]), + "sample" = rownames(t(abcd1[,indices_cntrl]))) %>% + rename(expression = all_of(ensembl_abcd1)) %>% + mutate("treatment" = "CNTRL") + +# df for dex samples +exp_dex <- data.frame("expression" = t(abcd1[,!indices_cntrl]), + "sample" = rownames(t(abcd1[,!indices_cntrl]))) %>% + rename(expression = all_of(ensembl_abcd1)) %>% + mutate("treatment" = "DEX") + +# combine cntrl and dex df +df <- rbind(exp_cntrl, exp_dex) +df$treatment <- factor(df$treatment) + +ggplot(df, aes(x = treatment, y = expression, fill = treatment)) + + geom_boxplot() + + geom_jitter() + + scale_fill_manual("", + breaks = c("CNTRL", "DEX"), + labels = c("Baseline", "Treatment"), + values = c("#B0BFBB", "#46866E")) + + scale_x_discrete(breaks = c("CNTRL", "DEX"), + labels = c("Baseline", "Treatment")) + + theme_light() + + theme(text = element_text(size= 16)) + + xlab("") + + ylab("Norm. Expression Level") + + guides(fill = "none") + +ggsave(filename = "09_FigureII_E.pdf", width = 7, height = 6) diff --git a/04_PlotsManuscript/10_FigureSII_A.R b/04_PlotsManuscript/10_FigureSII_A.R new file mode 100644 index 0000000..9430e6f --- /dev/null +++ b/04_PlotsManuscript/10_FigureSII_A.R @@ -0,0 +1,90 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# Number of DE genes and unique percentage + +library(data.table) +library(ggplot2) +library(dplyr) +library(tidyverse) + +regions <- + c("AMY", "PFC", "PVN", "CER", "vDG", "dDG", "vCA1", "dCA1") + + +# 1. Read DE tables from all regions ---------- + +list_reg_sig <- list() +list_genes_sig <- list() + +for (reg in regions) { + res <- + fread( + file = paste0( + "~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables", + "/02_", + reg, + "_deseq2_Dex_1_vs_0_lfcShrink.txt" + ), + sep = "\t" + ) + na_indices <- which(is.na(res$padj)) + res$padj[na_indices] <- 1 + res_sig <- res[res$padj <= 0.1, ] + # res_sig <- res[res$log2FoldChange >= 1] + list_reg_sig[[reg]] <- res_sig + list_genes_sig[[reg]] <- rownames(res_sig) +} + +# 2. Concatenate DE tables ----------------- + +data <- bind_rows(list_reg_sig, .id = "region") %>% + group_by(Ensembl_ID) %>% + summarise(region = list(region)) + +data_unique <- data %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + unnest(cols = c(region)) %>% + mutate("combined_id" = paste0(region, "-", Ensembl_ID)) + +data_barplot <- data_unique %>% + group_by(region, unique) %>% + count() %>% + group_by(region) %>% + mutate(sum = sum(n)) + + +# 3. Stacked barplot ------------------------- + +ggplot(data_barplot, aes(x = region, y = n, alpha = unique)) + + geom_bar(position = "stack", stat = "identity", fill = "orange") + + scale_alpha_manual( + name = "", + labels = c("DE in multiple regions", "DE unique"), + values = c(1.0, 0.5) + ) + + xlab("Brain region") + + ylab("# DE genes") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12), + # legend.title = element_blank(), + legend.position = "top" + ) + + geom_text(aes(label = paste0(round((n / sum) * 100, digits = 1 + ), "%")), + position = position_stack(vjust = 0.5), + size = 4, + show.legend = FALSE) +ggsave( + "10_FigureSII_A.pdf", + width = 8, + height = 6 +) diff --git a/04_PlotsManuscript/11_FigureSII_B.R b/04_PlotsManuscript/11_FigureSII_B.R new file mode 100644 index 0000000..3858189 --- /dev/null +++ b/04_PlotsManuscript/11_FigureSII_B.R @@ -0,0 +1,92 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# Number of hub genes and unique percentage + +library(data.table) +library(ggplot2) +library(dplyr) +library(tidyverse) + +regions <- + c("AMY", "PFC", "PVN", "CER", "vDG", "dDG", "vCA1", "dCA1") + + +# 1. Read DE tables from all regions ---------- + +list_reg_sig <- list() +list_genes_sig <- list() + +for (reg in regions) { + res <- + fread( + file = paste0( + "~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/coExpression_kimono/03_AnalysisFuncoup/", + "/04_", + reg, + "_funcoup_differential_nodebetweennessNorm_betacutoff0.01.csv" + ), + sep = "," + ) + na_indices <- which(is.na(res$nodebetweenness_norm)) + res$padj[na_indices] <- 0 + res_sig <- res[res$nodebetweenness_norm >= 1.0, ] + list_reg_sig[[reg]] <- res_sig + list_genes_sig[[reg]] <- rownames(res_sig) +} + + +# 2. Concatenate hub tables ----------------- + +data <- bind_rows(list_reg_sig, .id = "region") %>% + group_by(ensembl_id) %>% + summarise(region = list(region)) + +data_unique <- data %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + unnest(cols = c(region)) %>% + mutate("combined_id" = paste0(region, "-", ensembl_id)) + +data_barplot <- data_unique %>% + group_by(region, unique) %>% + count() %>% + group_by(region) %>% + mutate(sum = sum(n)) + + +# 3. Stacked barplot ------------------------- + +ggplot(data_barplot, aes(x = region, y = n, alpha = unique)) + + geom_bar(position = "stack", stat = "identity", fill = "darkred") + + scale_alpha_manual( + name = "", + labels = c("Hub in multiple regions", "Hub unique"), + values = c(1.0, 0.5) + ) + + xlab("Brain region") + + ylab("# Hub genes") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12), + # legend.title = element_blank(), + legend.position = "top" + ) + + geom_text(aes(label = paste0(round((n / sum) * 100, digits = 1 + ), "%")), + position = position_stack(vjust = 0.5), + size = 4, + color = "white", + show.legend = FALSE) + +ggsave( + "11_FigureSII_B.pdf", + width = 8, + height = 6 +) diff --git a/04_PlotsManuscript/12_FigureSII_C.R b/04_PlotsManuscript/12_FigureSII_C.R new file mode 100644 index 0000000..9083131 --- /dev/null +++ b/04_PlotsManuscript/12_FigureSII_C.R @@ -0,0 +1,248 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# GO enrichment for DE genes across all regions + +library(clusterProfiler) +library(org.Mm.eg.db) +library(ggplot2) +library(dplyr) +library(stringr) +library(writexl) + + +basepath <- "~/Documents/ownCloud/DexStim_RNAseq_Mouse/" + + +# 0. Read genes DE in all regions and background ----------------- +genes <- read.table(file = paste0(basepath, "tables/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_entrezID.txt"), + header = FALSE)[,1] + +# background are all genes in out dataset +background <- read.table(file = paste0(basepath, "tables/06_background_entrezID.txt"), + header = FALSE)[,1] + + + +# 1.1 GO enrichment for genes DE in all regions --------------------- + +# GO enrichment +min_genes <- round(length(genes)/10) +ego <- enrichGO(gene = as.character(genes), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego, n = 20) + + +# 1.2 GO enrichment for upregulated genes DE in all regions --------------------- +genes_up <- read.table(file = paste0(basepath, "tables/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_up_entrezID.txt"), + header = FALSE)[,1] + +# GO enrichment +min_genes <- round(length(genes_up)/10) +ego_up <- enrichGO(gene = as.character(genes_up), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego_up, n = 20) + + + + +# 1.3 GO enrichment for downregulated genes DE in all regions --------------------- +genes_down <- read.table( + file = paste0(basepath, + "tables/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_down_entrezID.txt"), + header = FALSE)[,1] + +# GO enrichment +min_genes <- length(genes_down)/10 +ego_down <- enrichGO(gene = as.character(genes_down), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego_down, n = 20) + + +# 1.4 Merge dataframes from all, up and downregulated genes -------------------- + +data_go <- left_join(ego, ego_down, by = c("ID", "Description"), + suffix = c(".all", ".down")) +data_go <- left_join(data_go, ego_up, by = c("ID", "Description"), + suffix = c("", ".up")) +# apparently not all entrez ids are valid --> not all taken into account in enrichment analysis +ngenes_rel <- 165 +nback_rel <- 12089 +# add columns relevant for gene ratio and odds ratio +data_go <- data_go %>% + mutate(n_genes = as.numeric(str_extract(data_go$BgRatio.all, "^\\d+"))) %>% + mutate(gr = Count.all/n_genes*100) %>% + mutate(b = ngenes_rel - Count.all, + c = n_genes - Count.all, + d = nback_rel - Count.all, + or = log2(Count.all*d)-log2(b*c)) + +data_heat <- data_go[1:30,c("Description", "p.adjust.down", "p.adjust")] %>% + tidyr::pivot_longer(cols = p.adjust.down:p.adjust) + + +# 1.5 Barplot ------------------------------- +# gene ratio +bp.1 <- + ggplot(data = data_go[1:25,], aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = gr + )) + + geom_bar(stat = "identity", position = "stack", + fill = "#0F5057") + + ylab("% Gene Ratio") + + xlab("") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +# odds ratio +bp.2 <- + ggplot(data = data_go[1:25,], aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = or + )) + + geom_bar(stat = "identity", position = "stack", + fill = "#FAA916") + + ylab("Odds Ratio") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +# p-value +bp.3 <- + ggplot(data = data_go[1:25,], aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = -log10(p.adjust.all) + )) + + geom_bar(stat = "identity", position = "stack", + fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + ylab("-log10(FDR)") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +hm.1 <- + ggplot(data = data_heat, aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = name, + fill = value <= 0.01 + )) + + geom_tile() + + scale_y_discrete(name ="sig. GO term", + limits=c("p.adjust","p.adjust.down"), + labels=c("upreg.", "downreg.")) + + scale_fill_manual( + name = "GO term sig.", + values = c("darkgrey", "black") + ) + + ylab("sig. GO term") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + #legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +# combined plot +comb <- ggarrange(bp.1, + bp.2 + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + bp.3 + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + hm.1 + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(2.5,1,1,1)) + +# Save plot +ggexport( + comb, + filename = "12_FigureSII_C.pdf", + height = 10, + width = 15 +) + + +# bring table to a format similar as other tables +tf <- data_go[1:25,] + +# add Category column +tf$Category <- "GO_bp" +# add GeneSet column +tf$GeneSet <- tf$Description %>% + stringr::str_replace_all(" ", "_") %>% + stringr::str_to_upper() +tf$GeneSet <- paste0("GO_", tf$GeneSet) +# add -log10 transformed FDR pvalue +tf$`[-log10 FDR]` <- -log10(tf$p.adjust.all) + +# subset to columns that should be printed +tf <- tf %>% + dplyr::select(Category, GeneSet, n_genes, Count, b, c, d, gr, or, pvalue.all, + p.adjust.all, `[-log10 FDR]`, geneID.all) %>% + dplyr::rename(N_genes = n_genes, + N_overlap_A = Count, + `Input that does not overlap_B` = b, + `GO term genes - overlap_C` = c, + `Not GO term genes and not in my geneset_D` = d, + `Genes ratio` = gr, + `OR[log2(a*d)-log2(b*c)]` = or, + p = pvalue.all, + adjP = p.adjust.all, + genes = geneID.all) + +# save to a excel file +write_xlsx(tf, "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/GOterms_DEgenesAllRegions.xlsx") diff --git a/04_PlotsManuscript/13_FigureSII_D.R b/04_PlotsManuscript/13_FigureSII_D.R new file mode 100644 index 0000000..8bc2fca --- /dev/null +++ b/04_PlotsManuscript/13_FigureSII_D.R @@ -0,0 +1,199 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# GO enrichment for hub genes across all regions + + +library(data.table) +library(ggplot2) +library(dplyr) +library(tidyverse) +library(org.Mm.eg.db) +library(clusterProfiler) +library(ggpubr) +library(writexl) + +regions <- + c("AMY", "PFC", "PVN", "CER", "vDG", "dDG", "vCA1", "dCA1") + + +# 1. Read DE tables from all regions ---------- + +list_reg_sig <- list() +list_genes_sig <- list() + +for (reg in regions) { + res <- + fread( + file = paste0( + "~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/coExpression_kimono/03_AnalysisFuncoup/", + "/04_", + reg, + "_funcoup_differential_nodebetweennessNorm_betacutoff0.01.csv" + ), + sep = "," + ) + na_indices <- which(is.na(res$nodebetweenness_norm)) + res$padj[na_indices] <- 0 + res_sig <- res[res$nodebetweenness_norm >= 1.0, ] + list_reg_sig[[reg]] <- res_sig + list_genes_sig[[reg]] <- res_sig$ensembl_id +} + +hub_shared <- Reduce(intersect, list_genes_sig) +genes <- mapIds(org.Mm.eg.db, keys = hub_shared, keytype = "ENSEMBL", column="ENTREZID") + +# background are all genes in out dataset +background <- read.table(file = paste0("~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/06_background_entrezID.txt"), + header = FALSE)[,1] + + +# 1.1 GO enrichment for genes DE in all regions --------------------- + +# GO enrichment +min_genes <- round(length(genes)/10) +ego <- enrichGO(gene = as.character(genes), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego, n = 20) + + +# 1.4 Merge dataframes from all, up and downregulated genes -------------------- + +# apparently not all entrez ids are valid --> not all taken into account in enrichment analysis +ngenes_rel <- 7 +nback_rel <- 12089 +# add columns relevant for gene ratio and odds ratio +data_go <- ego %>% + mutate(n_genes = as.numeric(str_extract(ego$BgRatio, "^\\d+"))) %>% + mutate(gr = Count/n_genes*100) %>% + mutate(b = ngenes_rel - Count, + c = n_genes - Count, + d = nback_rel - Count, + or = log2(Count*d)-log2(b*c)) + + +# 1.5 Barplot ------------------------------- +# gene ratio +bp.1 <- + ggplot(data = data_go[1:25,], aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = gr + )) + + geom_bar(stat = "identity", position = "stack", + fill = "#0F5057") + + ylab("% Gene Ratio") + + xlab("") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +# odds ratio +bp.2 <- + ggplot(data = data_go[1:25,], aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = or + )) + + geom_bar(stat = "identity", position = "stack", + fill = "#FAA916") + + ylab("Odds Ratio") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +# p-value +bp.3 <- + ggplot(data = data_go[1:25,], aes( + x = factor(Description, levels = rev(data_go$Description[1:25])), + y = -log10(p.adjust) + )) + + geom_bar(stat = "identity", position = "stack", + fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + ylab("-log10(FDR)") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + + +# combined plot +comb <- ggarrange(bp.1, + bp.2 + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + bp.3 + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(2.5,1,1)) + +# Save plot +ggexport( + comb, + filename = "13_FigureSII_D.pdf", + height = 10, + width = 13 +) + + +# bring table to a format similar as other tables +tf <- data_go[1:25,] + +# add Category column +tf$Category <- "GO_bp" +# add GeneSet column +tf$GeneSet <- tf$Description %>% + stringr::str_replace_all(" ", "_") %>% + stringr::str_to_upper() +tf$GeneSet <- paste0("GO_", tf$GeneSet) +# add -log10 transformed FDR pvalue +tf$`[-log10 FDR]` <- -log10(tf$p.adjust) + +# subset to columns that should be printed +tf <- tf %>% + dplyr::select(Category, GeneSet, n_genes, Count, b, c, d, gr, or, pvalue, + p.adjust, `[-log10 FDR]`, geneID) %>% + dplyr::rename(N_genes = n_genes, + N_overlap_A = Count, + `Input that does not overlap_B` = b, + `GO term genes - overlap_C` = c, + `Not GO term genes and not in my geneset_D` = d, + `Genes ratio` = gr, + `OR[log2(a*d)-log2(b*c)]` = or, + p = pvalue, + adjP = p.adjust, + genes = geneID) + +# save to a excel file +write_xlsx(tf, "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/GOterms_hubsAllRegions.xlsx") + diff --git a/04_PlotsManuscript/14_FigureSIV_gwas.R b/04_PlotsManuscript/14_FigureSIV_gwas.R new file mode 100644 index 0000000..f7fa6a2 --- /dev/null +++ b/04_PlotsManuscript/14_FigureSIV_gwas.R @@ -0,0 +1,75 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 30.11.2021 +## Author: Nathalie +################################################## +# GWAS enrichment plot for manuscript +# Tcf4 neighbours? + +library(readxl) +library(dplyr) +library(ggplot2) +library(ggpubr) + +# 1. Pathway enrichment for Abcd1 and neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure IV_GWAS/TCF4/FUMA_diffTcf4PFCnetwork_gene2func68788/FUMA_diffPFCTcf4.xlsx", + sheet = "GWAS") + +data <- arrange(data, desc(`[-log10]`)) +data$`my adj p` <- as.numeric(data$`my adj p`) +data <- data[data$`my adj p` <= 0.05,] +data <- data[!is.na(data$`my adj p`),] + +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# bar plot gene ratio +gr <- ggplot(data, aes(x = GeneSet, y = `Genes ratio`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#0F5057") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("% Gene Ratio") + + labs(x = NULL) + +# bar plot odds ratio +or <- ggplot(data, aes(x = GeneSet, y = `OR[log2(a*d)-log2(b*c)]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#FAA916") + + scale_fill_manual() + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("Odds Ratio") + + labs(x = NULL) + +# barplot fdr p-value +fdr <- ggplot(data, aes(x = GeneSet, y = `[-log10]`) ) + + geom_bar(position = position_dodge2(reverse=TRUE), stat="identity", fill = "#D45E60") + + geom_hline(yintercept = -log10(0.05),linetype="dashed", color = "red") + + theme_light() + + coord_flip() + + scale_x_discrete(limits = rev) + + theme(text = element_text(size= 14)) + + ylab("-log10(FDR)") + + labs(x = NULL) + +# combined barplot +comb <- ggarrange(gr, + or + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + fdr + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(1.8,1,1)) + +ggexport(comb, filename = "14_FigureSIV_gwas.pdf", width = 14, height = 8) + diff --git a/06_Shiny/www/DiffNetworks.pdf b/06_Shiny/www/DiffNetworks.pdf deleted file mode 100644 index 6885431..0000000 Binary files a/06_Shiny/www/DiffNetworks.pdf and /dev/null differ diff --git a/07_PlotsManuscript/01_FigureII_C.pdf b/07_PlotsManuscript/01_FigureII_C.pdf deleted file mode 100644 index 52b55f0..0000000 Binary files a/07_PlotsManuscript/01_FigureII_C.pdf and /dev/null differ diff --git a/07_PlotsManuscript/02_FigureII_F.pdf b/07_PlotsManuscript/02_FigureII_F.pdf deleted file mode 100644 index 683748f..0000000 Binary files a/07_PlotsManuscript/02_FigureII_F.pdf and /dev/null differ diff --git a/07_PlotsManuscript/03_FigureIII_B.pdf b/07_PlotsManuscript/03_FigureIII_B.pdf deleted file mode 100644 index 4e79f68..0000000 Binary files a/07_PlotsManuscript/03_FigureIII_B.pdf and /dev/null differ diff --git a/07_PlotsManuscript/03_FigureIII_C.pdf b/07_PlotsManuscript/03_FigureIII_C.pdf deleted file mode 100644 index 4b9fcd9..0000000 Binary files a/07_PlotsManuscript/03_FigureIII_C.pdf and /dev/null differ diff --git a/07_PlotsManuscript/05_FigureIV_Bbase.pdf b/07_PlotsManuscript/05_FigureIV_Bbase.pdf deleted file mode 100644 index 8afde51..0000000 Binary files a/07_PlotsManuscript/05_FigureIV_Bbase.pdf and /dev/null differ diff --git a/07_PlotsManuscript/05_FigureIV_Bdiff.pdf b/07_PlotsManuscript/05_FigureIV_Bdiff.pdf deleted file mode 100644 index 04ce51c..0000000 Binary files a/07_PlotsManuscript/05_FigureIV_Bdiff.pdf and /dev/null differ diff --git a/07_PlotsManuscript/06_FigureSIII_A.pdf b/07_PlotsManuscript/06_FigureSIII_A.pdf deleted file mode 100644 index 859385e..0000000 Binary files a/07_PlotsManuscript/06_FigureSIII_A.pdf and /dev/null differ diff --git a/07_PlotsManuscript/07_FigureSIV_A.pdf b/07_PlotsManuscript/07_FigureSIV_A.pdf deleted file mode 100644 index 113c07f..0000000 Binary files a/07_PlotsManuscript/07_FigureSIV_A.pdf and /dev/null differ diff --git a/07_PlotsManuscript/07_FigureSIV_B.pdf b/07_PlotsManuscript/07_FigureSIV_B.pdf deleted file mode 100644 index 7a92588..0000000 Binary files a/07_PlotsManuscript/07_FigureSIV_B.pdf and /dev/null differ diff --git a/07_PlotsManuscript/07_FigureSIV_B_allRegions.pdf b/07_PlotsManuscript/07_FigureSIV_B_allRegions.pdf deleted file mode 100644 index 12a239d..0000000 Binary files a/07_PlotsManuscript/07_FigureSIV_B_allRegions.pdf and /dev/null differ diff --git a/07_PlotsManuscript/08_FigureIII_A.pdf b/07_PlotsManuscript/08_FigureIII_A.pdf deleted file mode 100644 index 5881099..0000000 Binary files a/07_PlotsManuscript/08_FigureIII_A.pdf and /dev/null differ diff --git a/07_PlotsManuscript/09_FigureII_D.pdf b/07_PlotsManuscript/09_FigureII_D.pdf deleted file mode 100644 index d3b6a06..0000000 Binary files a/07_PlotsManuscript/09_FigureII_D.pdf and /dev/null differ diff --git a/07_PlotsManuscript/09_FigureII_E.pdf b/07_PlotsManuscript/09_FigureII_E.pdf deleted file mode 100644 index 6b90a89..0000000 Binary files a/07_PlotsManuscript/09_FigureII_E.pdf and /dev/null differ diff --git a/07_PlotsManuscript/10_FigureSII_A.pdf b/07_PlotsManuscript/10_FigureSII_A.pdf deleted file mode 100644 index 3e55a10..0000000 Binary files a/07_PlotsManuscript/10_FigureSII_A.pdf and /dev/null differ diff --git a/07_PlotsManuscript/11_FigureSII_B.pdf b/07_PlotsManuscript/11_FigureSII_B.pdf deleted file mode 100644 index 7cf2380..0000000 Binary files a/07_PlotsManuscript/11_FigureSII_B.pdf and /dev/null differ diff --git a/07_PlotsManuscript/12_FigureSII_C.pdf b/07_PlotsManuscript/12_FigureSII_C.pdf deleted file mode 100644 index e29002d..0000000 Binary files a/07_PlotsManuscript/12_FigureSII_C.pdf and /dev/null differ diff --git a/07_PlotsManuscript/13_FigureSII_D.pdf b/07_PlotsManuscript/13_FigureSII_D.pdf deleted file mode 100644 index 2df2c6f..0000000 Binary files a/07_PlotsManuscript/13_FigureSII_D.pdf and /dev/null differ diff --git a/07_PlotsManuscript/14_FigureSIV_gwas.pdf b/07_PlotsManuscript/14_FigureSIV_gwas.pdf deleted file mode 100644 index 95f2bce..0000000 Binary files a/07_PlotsManuscript/14_FigureSIV_gwas.pdf and /dev/null differ