Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DiffBrainNet/01_DiffExp/06b_comparison_HIP_deseq_regions_plots.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
244 lines (198 sloc)
7.34 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
## Project: DexStim Mouse Brain | |
## Date: 19.04.2021 | |
## Author: Nathalie | |
################################################## | |
# Compare deseq SV DE genes between HIP regions | |
# Plots for manuscript | |
library(RColorBrewer) | |
library(data.table) | |
library(ggplot2) | |
library(dplyr) | |
library(tidyverse) | |
library(tidyr) | |
# library(gridExtra) | |
library(pheatmap) | |
basepath <- "~/Documents/ownCloud/DexStim_RNAseq_Mouse/" | |
regions <- | |
c("vDG", "dDG", "vCA1", "dCA1") | |
# 1. Read DE tables from HIP regions ---------- | |
list_reg <- list() | |
list_reg_sig <- list() | |
list_genes_top10 <- list() | |
for (reg in regions) { | |
res <- | |
fread( | |
file = paste0( | |
basepath, | |
"tables/02_", | |
reg, | |
"_deseq2_Dex_1_vs_0_lfcShrink.txt" | |
), | |
sep = "\t" | |
) | |
na_indices <- which(is.na(res$padj)) | |
res$padj[na_indices] <- 1 | |
list_reg[[reg]] <- res | |
res_sig <- res[res$padj <= 0.1, ] | |
# res_sig <- res[res$log2FoldChange >= 1] | |
list_reg_sig[[reg]] <- res_sig | |
list_genes_top10[[reg]] <- res_sig$Ensembl_ID[1:25] | |
} | |
# 2. check uniqueness of DE genes --------------- | |
for (reg in regions){ | |
index_reg <- which(regions == reg) | |
# df <- bind_rows(list_reg[-index_reg], .id="region") %>% | |
# filter(DE0.1) | |
df <- bind_rows(list_reg_sig[-index_reg], .id="region") | |
list_reg_sig[[reg]]$regions_DE <- sapply(list_reg_sig[[reg]]$Ensembl_ID, | |
function(x) paste(df[df$Ensembl_ID == x,]$region, collapse = " ")) | |
list_reg_sig[[reg]]$unique_DE <- sapply(list_reg_sig[[reg]]$regions_DE, | |
function(x) x == "") | |
} | |
# 3. plot number of unique/not unique genes within HIP ------------------------- | |
# make dataframe to plot barplot | |
df_uni <- data.frame(region = character(), | |
uniqueness = character(), | |
genes = numeric()) | |
for (reg in regions){ | |
u <- table(list_reg_sig[[reg]]$unique_DE) | |
df_uni <- rbind(df_uni, list("region" = reg, "uniqueness" = "unique", | |
"genes" = u[2])) | |
df_uni <- rbind(df_uni, list("region" = reg, "uniqueness" = "not_unique", | |
"genes" = u[1])) | |
} | |
df_uni$region <- as.factor(df_uni$region) | |
df_uni$uniqueness <- as.factor(df_uni$uniqueness) | |
# sum DE genes per region for percentage label | |
df_uni <- df_uni %>% | |
group_by(region) %>% | |
mutate(sum = sum(genes)) | |
# stacked barplot with unique DE genes in HIP | |
ggplot(df_uni, aes(x = region, | |
y = genes, | |
fill = uniqueness)) + | |
geom_bar(stat = "identity", position = "stack") + | |
scale_fill_manual( | |
name = "", | |
labels = c("DE in multiple HIP regions", "DE unique in HIP"), | |
values = c("tan1", "royalblue") | |
) + | |
xlab("brain region") + | |
ylab("# diff. exp. genes") + | |
theme_light() + | |
theme( | |
axis.title.x = element_text(size = 15), | |
axis.title.y = element_text(size = 15), | |
axis.text.x = element_text(size = 12), | |
axis.text.y = element_text(size = 12), | |
legend.text = element_text(size = 12), | |
# legend.title = element_blank(), | |
legend.position = "top" | |
) + | |
geom_text(aes(label = paste0(round((genes / sum) * 100, digits = 1 | |
), "%")), | |
position = position_stack(vjust = 0.5), | |
size = 4) | |
ggsave(filename = paste0(basepath, "figures/06b_comparison_HIP_deseq_barplot_vertical_percentage.png"), | |
width = 8, | |
height = 6) | |
# 4. Heatmap of unique genes with highest p-value --------------------------- | |
# Function to get gene IDs of unique DE with lowest p-values | |
top_unique <- function(df){ | |
df <- df[df$unique_DE,] | |
genes <- df$Ensembl_ID[1:10] | |
return(genes) | |
} | |
# Concatenate DE tables ----------------- | |
# TODO: try also p-value | |
data <- bind_rows(list_reg, .id = "region") | |
genes <- unlist(lapply(list_reg_sig, top_unique)) | |
data <- data[data$Ensembl_ID %in% genes,] %>% | |
pivot_wider(id_cols = c(Gene_Symbol), | |
names_from = region, | |
#values_from = log2FoldChange) | |
values_from = padj) | |
data_mat <- as.matrix(data[,2:5]) | |
rownames(data_mat) <- data$Gene_Symbol | |
# Heatmap | |
# Complex heatmap | |
library(ComplexHeatmap) | |
library(circlize) | |
Heatmap(data_mat, | |
name = "log2FoldChange", #title of legend | |
column_title = "Hippocampal region", row_title = "Genes", | |
row_names_gp = gpar(fontsize = 7), # Text size for row names | |
cluster_columns = FALSE, | |
#col = colorRamp2(c(-4, 0, 4), c("blue", "#EEEEEE", "red")) | |
col = colorRamp2(c(0,1), c("red", "#EEEEEE")) | |
) | |
pheatmap(data_mat, | |
cutree_rows = 2, | |
cluster_cols = FALSE) | |
# 3. GO enrichment for the genes of each region ------------------ | |
go_enrichment_all <- function(df_reg, unique){ | |
if (unique){ | |
genes <- df_reg$Ensembl_ID[df_reg$unique_DE] | |
} else { | |
genes <- df_reg$Ensembl_ID | |
} | |
genes <- mapIds(org.Mm.eg.db, keys = genes, keytype = "ENSEMBL", | |
column = "ENTREZID") | |
background <- read.table(file = paste0(basepath, "tables/06_background_entrezID.txt"), | |
header = FALSE)$V1 | |
# enrichment | |
ego <- enrichGO(gene = as.character(genes), | |
universe = as.character(background), | |
OrgDb = org.Mm.eg.db, | |
ont = "BP", | |
pAdjustMethod = "BH", | |
#pvalueCutoff = 0.01, | |
#qvalueCutoff = 0.05, | |
minGSSize = 10, # min number of genes associated with GO term | |
maxGSSize = 10000, # max number of genes associated with GO term | |
readable = TRUE)@result | |
# ego_simple <- clusterProfiler::simplify( | |
# ego, | |
# cutoff = 0.7, | |
# by = "p.adjust", | |
# select_fun = min, | |
# measure = "Wang", | |
# semData = NULL | |
# )@result | |
return(ego) | |
} | |
list_GO <- list() | |
# GO.BPcollection = subsetCollection(GOcollection, tags = "GO.BP") | |
for (reg in regions){ | |
go_enr_unique <- go_enrichment_all(list_reg_sig[[reg]], TRUE) | |
# go_enr_all <- go_enrichment_all(list_reg[[reg]], GOcollection, FALSE) | |
list_GO[[reg]] <- go_enr_unique | |
} | |
# 4. Plot GO terms | |
df_all <- bind_rows(list_GO, .id="region") | |
for (reg in regions){ | |
df_reg <- list_GO[[reg]] %>% | |
# group_by(inGroups) %>% | |
# slice_min(order_by = p.adjust, n = 10) | |
slice_head(n = 10) | |
df <- df_all[df_all$ID %in% df_reg$ID,] | |
# df$dataSetName <- sapply(df$dataSetName, function(x) str_trunc(x, 45, "right")) | |
df$Description <- factor(df$Description, | |
levels = rev(reorder(df$Description[df$region==reg], df$p.adjust[df$region==reg]))) | |
df$region <- factor(df$region, levels = c("vDG", "dDG", "vCA1", "dCA1")) | |
#df$inGroups <- factor(df$inGroups) | |
#levels(df$inGroups) <- c("Biological Process", "Cellular Components", "Molecular Function") | |
# Plot results (plotted pvalues are not adjusted for multiple testing) | |
df %>% | |
arrange(desc(Description)) %>% | |
ggplot(aes(x=Description, y = -log10(p.adjust), fill = region)) + | |
geom_bar(position = position_dodge2(reverse=TRUE), stat="identity") + | |
geom_hline(yintercept = -log10(0.1),linetype="dashed", color = "red") + | |
coord_flip() + | |
xlab("GOterm") + | |
ggtitle(paste0("GO terms enriched for DE genes only in ",reg, " (Top 10 each)")) + | |
# facet_wrap(~inGroups, scales="free") + | |
theme(axis.text.y = element_text(size = 10)) | |
ggsave(filename = paste0(basepath, "figures", "/06b_HIP_", reg, "_GOterms_unique.png"), width = 13, height = 7) | |
} | |