diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e376fe2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pdf +*.png +*.ai +*.csv diff --git a/01_DiffExp/13_GRexpression_plots.R b/01_DiffExp/13_GRexpression_plots.R new file mode 100644 index 0000000..37f764e --- /dev/null +++ b/01_DiffExp/13_GRexpression_plots.R @@ -0,0 +1,226 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 06.09.2022 +## Author: Nathalie +################################################## +# Plot GR expression level at control level in all brain regions +# and check if correlation with number of DE genes exists +# Suggestion of reviewer + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) + +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +mean_exp_file <- file.path(folder_table, "12_meanExp_regionDex.csv") +files <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) + +# IDs +gene_symbol_gr <- "Nr3c1" +gene_symbol_mr <- "Nr3c2" +ensembl_gr <- "ENSMUSG00000024431" +ensembl_mr <- "ENSMUSG00000031618" + + +## 1. Read mean expression data +data <- read.table(mean_exp_file, sep = ";", header = TRUE) %>% + dplyr::select(contains("CNTRL"), gene_symbol) %>% + dplyr::rename_all(funs(str_replace_all(., "_CNTRL", ""))) +gr_mr <- data[data$gene_symbol == gene_symbol_gr | data$gene_symbol == gene_symbol_mr,] %>% # subset to GR and MR + tibble::remove_rownames() %>% + tibble::column_to_rownames(var = "gene_symbol") +gr_mr <- as.data.frame(t(gr_mr)) %>% + tibble::rownames_to_column("region") + +## 2. Number of DE genes per region +# read files with DE results +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files) +expr_list <- lapply(files, function(x) fread(x)) +names(expr_list) <- regions_files +# concatenate DE results to get those for GR and MR +de_df <- bind_rows(expr_list, .id="region") %>% + dplyr::filter(V1 == ensembl_gr | V1 == ensembl_mr) %>% + dplyr::mutate(gene = case_when( + V1 == ensembl_gr ~ "Nr3c1", + V1 == ensembl_mr ~ "Nr3c2" + )) %>% + dplyr::select(region, gene, log2FoldChange, padj) %>% + pivot_wider(names_from = gene, values_from = c(log2FoldChange,padj)) +# filter each df to genes with FDR smaller 0.1 +expr_list <- lapply(expr_list, function(x) dplyr::filter(x, padj <= 0.1)) +df_degenes <- bind_rows(expr_list, .id="region") %>% + mutate(up_down = ifelse(log2FoldChange > 0, "up", "down")) +n_degenes <- df_degenes %>% + group_by(region) %>% + count() +n_updowngenes <- df_degenes %>% + group_by(region, up_down) %>% + count() + +## 3. Join mean expression values with number of DE genes +gr_mr <- inner_join(gr_mr, n_degenes, by = "region") +gr_mr <- dplyr::rename(gr_mr, nr_de = n) +gr_mr <- inner_join(gr_mr, de_df, by="region") + +gr_mr_updown <- inner_join(gr_mr, n_updowngenes, by = "region") %>% + mutate(up_down = factor(gr_mr_updown$up_down, levels = c("up", "down"))) + +## 4. Plot expression of GR and MR against number of DE genes +# GR +ggplot(gr_mr, aes(x=Nr3c1, y=nr_de, + size=log2FoldChange_Nr3c1, + color=-log10(padj_Nr3c1), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr$Nr3c1),max(gr_mr$Nr3c1)+0.1) + + xlab("Normalised Expression Level GR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GR.pdf"), + width = 8, + height = 6) + +# MR +ggplot(gr_mr, aes(x=Nr3c2, y=nr_de, + size=log2FoldChange_Nr3c2, + color=-log10(padj_Nr3c2), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr$Nr3c2),max(gr_mr$Nr3c2)+0.1) + + xlab("Normalised Expression Level MR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_MR.pdf"), + width = 8, + height = 6) + + +## 5. Plot expression of GR and MR against number of DE genes - stratified by up and downregulation +# GR +ggplot(gr_mr_updown, aes(x=Nr3c1, y=n, + size=log2FoldChange_Nr3c1, + color=-log10(padj_Nr3c1), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr_updown$Nr3c1),max(gr_mr_updown$Nr3c1)+0.1) + + xlab("Normalised Expression Level GR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + facet_wrap(~up_down, nrow = 2, scales = "free", + labeller = labeller(up_down = + c("down" = "Downregulated genes", + "up" = "Upregulated genes") + )) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GR_updown.pdf"), + width = 8, + height = 6) + +# MR +ggplot(gr_mr_updown, aes(x=Nr3c2, y=n, + size=log2FoldChange_Nr3c2, + color=-log10(padj_Nr3c2), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr_updown$Nr3c2),max(gr_mr_updown$Nr3c2)+0.1) + + xlab("Normalised Expression Level MR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + facet_wrap(~up_down, nrow = 2, scales = "free", + labeller = labeller(up_down = + c("down" = "Downregulated genes", + "up" = "Upregulated genes") + )) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_MR_updown.pdf"), + width = 8, + height = 6) + + +## 6. Plot GR/MR ratio against number of DE genes - stratified by up and downregulation +gr_mr_updown <- gr_mr_updown %>% + mutate(grmr_ratio = Nr3c1/Nr3c2) + +# GR/MR ratio +ggplot(gr_mr_updown, aes(x=grmr_ratio, y=n, + size=log2FoldChange_Nr3c2, + color=-log10(padj_Nr3c2), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr_updown$grmr_ratio),max(gr_mr_updown$grmr_ratio)+0.1) + + xlab("Normalised Expression Level MR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + facet_wrap(~up_down, nrow = 2, scales = "free", + labeller = labeller(up_down = + c("down" = "Downregulated genes", + "up" = "Upregulated genes") + )) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GRMRratio_updown.pdf"), + width = 8, + height = 6) diff --git a/01_DiffExp/14_CompGWAS_DE.R b/01_DiffExp/14_CompGWAS_DE.R new file mode 100644 index 0000000..07ac274 --- /dev/null +++ b/01_DiffExp/14_CompGWAS_DE.R @@ -0,0 +1,147 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 07.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) + ranks <- data$ZSTAT + names(ranks) <- data$GENE + + + ## 3. Run fgsea for Gene Set Enrichment Analysis + fgseaRes <- fgsea(pathways = de_human, + stats = ranks) + # sampleSize = 600) + res_list[[trait]] <- fgseaRes + + # unique DE genes + fgseaRes_uni <- fgsea(pathways = de_uni_human, + stats = ranks) + # sampleSize = 600) + res_list_uni[[trait]] <- fgseaRes_uni + + plotEnrichment(de_uni_human[["PFC"]], + ranks) + +} + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEall.pdf"), + width = 8, + height = 6) + + +# plot for unique DE genes +df <- bind_rows(res_list_uni, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEunique.pdf"), + width = 8, + height = 6) diff --git a/01_DiffExp/14a_CompGWAS_DE_Enrichment.R b/01_DiffExp/14a_CompGWAS_DE_Enrichment.R new file mode 100644 index 0000000..afc7512 --- /dev/null +++ b/01_DiffExp/14a_CompGWAS_DE_Enrichment.R @@ -0,0 +1,151 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 19.09.2022 +## Author: Nathalie +################################################## +# Compare DEs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) +# --> instead of GSEA we look at normal enrichment + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ", "MS") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) %>% + # mutate(fdr = p.adjust(P, method = "fdr")) %>% + # filter(fdr <= 0.05) + filter(P <= 0.05) + + ## 3. Run fora for overrepresentation test + foraRes <- fora(pathways = de_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list[[trait]] <- foraRes + + # unique DE genes + foraRes_uni <- fora(pathways = de_uni_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list_uni[[trait]] <- foraRes_uni + + # plotEnrichment(de_human[["PFC"]], + # ranks) + +} + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,2)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEall.pdf"), + width = 8, + height = 6) + + +# plot for unique DE genes +df_uni <- bind_rows(res_list_uni, .id="trait") +df_uni$FDR <- p.adjust(df_uni$pval, method = "fdr") # apply correction across all tests +ggplot(df_uni, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEunique.pdf"), + width = 8, + height = 6) \ No newline at end of file diff --git a/01_DiffExp/14b_CompGWAS_DE_otherTable.R b/01_DiffExp/14b_CompGWAS_DE_otherTable.R new file mode 100644 index 0000000..951a1dd --- /dev/null +++ b/01_DiffExp/14b_CompGWAS_DE_otherTable.R @@ -0,0 +1,156 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 19.09.2022 +## Author: Nathalie +################################################## +# Compare DEs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) +# --> instead of GSEA we look at normal enrichment + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ", "MS") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/MAGMA published.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +magma <- read_excel(file_hmagma, sheet = "Disorder-associated genes") %>% + pivot_longer(cols = ADHD:PD, + names_to = "disease", + values_to = "genes") %>% + filter(!is.na(genes)) +magma_ensembl <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), + filters = "hgnc_symbol", + values = magma$genes, mart = human) +magma_ensembl <- right_join(magma, magma_ensembl, by=c("genes"="hgnc_symbol")) + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + # get genes for disease + dis_genes <- magma_ensembl$ensembl_gene_id[magma_ensembl$disease == trait] + + ## 3. Run fora for overrepresentation test + foraRes <- fora(pathways = de_human, + genes = dis_genes, + universe = background_human) + # sampleSize = 600) + res_list[[trait]] <- foraRes + + # unique DE genes + foraRes_uni <- fora(pathways = de_uni_human, + genes = dis_genes, + universe = background_human) + # sampleSize = 600) + res_list_uni[[trait]] <- foraRes_uni + + # plotEnrichment(de_human[["PFC"]], + # ranks) + +} + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,3)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +# ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEall.pdf"), +# width = 8, +# height = 6) + + +# plot for unique DE genes +df_uni <- bind_rows(res_list_uni, .id="trait") +df_uni$FDR <- p.adjust(df_uni$pval, method = "fdr") # apply correction across all tests +ggplot(df_uni, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +# ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEunique.pdf"), +# width = 8, +# height = 6) \ No newline at end of file diff --git a/01_DiffExp/14c_CompGWAS_DE_CrossDisorder.R b/01_DiffExp/14c_CompGWAS_DE_CrossDisorder.R new file mode 100644 index 0000000..255a906 --- /dev/null +++ b/01_DiffExp/14c_CompGWAS_DE_CrossDisorder.R @@ -0,0 +1,140 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 19.09.2022 +## Author: Nathalie +################################################## +# Compare DEs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) +# --> instead of GSEA we look at normal enrichment + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_crossdisorder <- paste0(basedir, "/data/reviews/cross-disorder.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + + + +## 2. Read GWAS/H-MAGMA data +data <- read_excel(file_crossdisorder, sheet="Sheet1") +data_ens <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), # no difference between v3 and v4 + filters = "hgnc_symbol", + values = data$`Cross-disorder GWAS`, mart = human) +cd_genes <- data_ens$ensembl_gene_id[data_ens$ensembl_gene_id %in% background_human] + +## 3. Run fora for overrepresentation test +foraRes <- fora(pathways = de_human, + genes = cd_genes, + universe = background_human) +# sampleSize = 600) +res_list[[trait]] <- foraRes + +# unique DE genes +foraRes_uni <- fora(pathways = de_uni_human, + genes = data$GENE, + universe = background_human) +# sampleSize = 600) +res_list_uni[[trait]] <- foraRes_uni + + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,2)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEall.pdf"), + width = 8, + height = 6) + + +# plot for unique DE genes +df_uni <- bind_rows(res_list_uni, .id="trait") +df_uni$FDR <- p.adjust(df_uni$pval, method = "fdr") # apply correction across all tests +ggplot(df_uni, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEunique.pdf"), + width = 8, + height = 6) \ No newline at end of file diff --git a/01_DiffExp/15_CompGWAS_DN.R b/01_DiffExp/15_CompGWAS_DN.R new file mode 100644 index 0000000..c45ed7c --- /dev/null +++ b/01_DiffExp/15_CompGWAS_DN.R @@ -0,0 +1,148 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 07.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) + +# TODO: think about background --> restrict to genes in our dataset? + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_dn <- list.files(path = paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/"), + pattern = "_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv$", full.names = TRUE) +regions_files <- sub(".*03a_(\\w*)_funcoup_treatment_nodebetween.*","\\1",files_dn) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_dn, function(x) fread(x) %>% filter(nodebetweenness_norm >= 1)) +names(expr_list) <- regions_files + +dn_list <- lapply(expr_list, function(x) x$ensembl_id) + +# unique DN genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(ensembl_id) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +dn_uni <- list() +for (r in regions_files){ + dn_uni[[r]] <- data_unique$ensembl_id[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +dn_human <- lapply(dn_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +dn_uni_human <- lapply(dn_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) + ranks <- data$ZSTAT + names(ranks) <- data$GENE + + + ## 3. Run fgsea for Gene Set Enrichment Analysis + fgseaRes <- fgsea(pathways = dn_human, + stats = ranks) + # sampleSize = 600) + res_list[[trait]] <- fgseaRes + + # unique DN genes + fgseaRes_uni <- fgsea(pathways = dn_uni_human, + stats = ranks) + # sampleSize = 600) + res_list_uni[[trait]] <- fgseaRes_uni + + # plotEnrichment(dn_human[["PFC"]], + # ranks) + +} + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15_Reviewer2_1b_DNall.pdf"), + width = 8, + height = 6) + + +# plot for unique DN genes +df <- bind_rows(res_list_uni, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15_Reviewer2_1b_DNunique.pdf"), + width = 8, + height = 6) diff --git a/01_DiffExp/15a_CompGWAS_DN_Enrichment.R b/01_DiffExp/15a_CompGWAS_DN_Enrichment.R new file mode 100644 index 0000000..2ae82d4 --- /dev/null +++ b/01_DiffExp/15a_CompGWAS_DN_Enrichment.R @@ -0,0 +1,150 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 07.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) + +# TODO: think about background --> restrict to genes in our dataset? + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ", "MS") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_dn <- list.files(path = paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/"), + pattern = "_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv$", full.names = TRUE) +regions_files <- sub(".*03a_(\\w*)_funcoup_treatment_nodebetween.*","\\1",files_dn) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_dn, function(x) fread(x) %>% filter(nodebetweenness_norm >= 1)) +names(expr_list) <- regions_files + +dn_list <- lapply(expr_list, function(x) x$ensembl_id) + +# unique DN genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(ensembl_id) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +dn_uni <- list() +for (r in regions_files){ + dn_uni[[r]] <- data_unique$ensembl_id[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +dn_human <- lapply(dn_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +dn_uni_human <- lapply(dn_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) %>% + # mutate(fdr = p.adjust(P, method = "fdr")) %>% + # filter(fdr <= 0.05) + filter(P <= 0.05) + + ## 3. Run fora for overrepresentation test + foraRes <- fora(pathways = dn_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list[[trait]] <- foraRes + + # unique DE genes + foraRes_uni <- fora(pathways = dn_uni_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list_uni[[trait]] <- foraRes_uni + + # plotEnrichment(dn_human[["PFC"]], + # ranks) + +} + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,2)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15a_Reviewer2_1b_DNall.pdf"), + width = 8, + height = 6) + + +# plot for unique DN genes +df <- bind_rows(res_list_uni, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15a_Reviewer2_1b_DNunique.pdf"), + width = 8, + height = 6) diff --git a/01_DiffExp/16_CompDexHumanResults.R b/01_DiffExp/16_CompDexHumanResults.R new file mode 100644 index 0000000..f1565b7 --- /dev/null +++ b/01_DiffExp/16_CompDexHumanResults.R @@ -0,0 +1,108 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 14.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with DE genes from human Dex study +# (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8669026/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(biomaRt) +library(ggplot2) +library(UpSetR) +library(rlist) + + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_human <- paste0(basedir, "/data/reviews/DexHumanBlood_MooreEtAl/41398_2021_1756_MOESM1_ESM.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +print(sub(".*02_(\\w*)_deseq2.*","\\1",files_de)) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + +## 0. Set up BioMart +# map mouse Ensembl Ids to human Ensembl Ids --> homology mapping +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + + +## 1. Read differentially expressed genes from human data into list +data <- read_excel(file_human, sheet="2_DEA_results") + +# get Ensembl IDs for human genes +ens_human <- getBM(attributes = c("ensembl_gene_id", "illumina_humanht_12_v3"), # no difference between v3 and v4 + filters = "illumina_humanht_12_v3", + values = data$Probe_Id, mart = human) +data <- dplyr::inner_join(data, ens_human, by = c("Probe_Id"="illumina_humanht_12_v3")) + +## 2.Read background genes +background <- read.table(file_background, + row.names = NULL) +# get human ensembl IDs for background genes +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +## 3. Subset human DE genes +# subset human DE genes to those in background set +data <- data[data$ensembl_gene_id %in% background_human,] + +# DE genes +data_de <- data[data$padj <= 0.05,] + +## 4. Read DE tables from all regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) +de_list_human <- lapply(de_list, + function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + tmp <- data_unique$V1[data_unique$region == r] + de_uni[[r]] <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=tmp, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 +} + + +## 5. Plot overlap between human and mouse DE genes +# unique DE genes +list_flat <- c(de_uni, list("human" = data_de$ensembl_gene_id)) +pdf(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/16_Reviewer2_0_DEGDexHuman_DEunique.pdf"), + width = 12, height = 8) +par(mar=c(6,5,4,2) + 0.1) +print(upset(fromList(list_flat), nsets = 9, order.by = "freq", + text.scale = c(1.8, 1.9, 1.8, 1.9, 1.9, 1.9), + sets.x.label = "#DE genes", + mainbar.y.label = "#DE genes in intersection")) +dev.off() + +# all DE genes +list_flat <- c(de_list_human, list("human" = data_de$ensembl_gene_id)) +pdf(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/16_Reviewer2_0_DEGDexHuman_DEall.pdf"), + width = 12, height = 8) +print(upset(fromList(list_flat), nsets = 9, order.by = "freq", + text.scale = c(1.8, 1.9, 1.8, 1.9, 1.9, 1.9), + sets.x.label = "#DE genes", + mainbar.y.label = "#DE genes in intersection")) +dev.off() diff --git a/01_DiffExp/17_CompPostMortemResults.R b/01_DiffExp/17_CompPostMortemResults.R new file mode 100644 index 0000000..f7df5bb --- /dev/null +++ b/01_DiffExp/17_CompPostMortemResults.R @@ -0,0 +1,90 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 14.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with DE genes from post mortem study +# (https://pubmed.ncbi.nlm.nih.gov/30545856/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(biomaRt) +library(ggplot2) +library(UpSetR) +library(rlist) + + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_pm <- paste0(basedir, "/data/reviews/PostmortemBrain/aat8127_table_s1.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +print(sub(".*02_(\\w*)_deseq2.*","\\1",files_de)) +files_dn <- list.files(path = paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/"), + pattern = "_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv$", full.names = TRUE) +regions_files <- sub(".*03a_(\\w*)_funcoup_treatment_nodebetween.*","\\1",files_dn) +file_background <- paste0(folder_table, "/06_background.txt") + +## 1. Set up BioMart +# map mouse Ensembl Ids to human Ensembl Ids --> homology mapping +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + + +## 2. Read background genes and get human background IDs +background <- read.table(file_background, + row.names = NULL) +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +## 3. Read differentially expressed genes and transcripts from postmortem study +list_pm_genes <- list() +for (r in c("DGE", "DTE", "DTU")){ + # read file including ASD, SCZ and BD results + data <- read_excel(file_pm, sheet=r) + + list_pm_genes[[r]] <- list() + + for (d in c("ASD", "BD", "SCZ")){ + data_d <- data %>% + rename_with(~ gsub('DTU\\.', '', .x)) %>% # DTU list has prefix "DTU" in column names + dplyr::select("ensembl_gene_id", starts_with(d)) %>% + rename_with(~ gsub(paste0(d,'\\.'), '', .x)) %>% + rename_with(tolower) %>% # DTU table has FDR instead of fdr + dplyr::filter(fdr <= 0.05) + list_pm_genes[[r]][[d]] <- data_d$ensembl_gene_id[data_d$ensembl_gene_id %in% background_human] + } +} + + +## 4. Read DE tables from all regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) +de_list_human <- lapply(de_list, + function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + + +## 5. Upset plot of PFC and post mortem brain DE genes +list_flat <- list("PFC.mouse" = de_list_human[["PFC"]], + "PFC.human.ASD" = list_pm_genes[["DGE"]][["ASD"]], + "PFC.human.BD" = list_pm_genes[["DGE"]][["BD"]], + "PFC.human.SCZ" = list_pm_genes[["DGE"]][["SCZ"]]) +pdf(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/17_Reviewer2_2_DEGpostmortem.pdf"), + width = 12, height = 8) +par(mar=c(6,5,4,2) + 0.1) +print(upset(fromList(list_flat), nsets = 4, order.by = "freq", + text.scale = c(1.8, 1.9, 1.8, 1.9, 1.9, 1.9), + sets.x.label = "#DE genes", + mainbar.y.label = "#DE genes in intersection")) +dev.off() diff --git a/01_DiffExp/18_CompNetworksPostMortem.R b/01_DiffExp/18_CompNetworksPostMortem.R new file mode 100644 index 0000000..cca58a7 --- /dev/null +++ b/01_DiffExp/18_CompNetworksPostMortem.R @@ -0,0 +1,251 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 16.09.2022 +## Author: Nathalie +################################################## +# Compare hub genes with disease associated modules +# from postmortem study +# (https://pubmed.ncbi.nlm.nih.gov/30545856/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(biomaRt) +library(ggplot2) +library(ggpubr) + + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_modules <- paste0(basedir, "/data/reviews/PostmortemBrain/aat8127_table_s5.xlsx") +file_de <- paste0(folder_table, "/02_PFC_deseq2_Dex_0_vs_1_lfcShrink.txt") +file_dn <- paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/", + "03a_PFC_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv") +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Set up BioMart +# map mouse Ensembl Ids to human Ensembl Ids --> homology mapping +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + +## 2. Read modules and disease associations +modules_genes <- read_excel(file_modules, sheet="geneModules") +modules_disease <- read_excel(file_modules, sheet = "Module-DiseaseAssociation") %>% + filter(Network == "gene") + + +## 3. Read DE and hub genes from mouse in PFC +# DE genes +res <- read.table(file_de, sep="\t", + header = TRUE) +na_indices <- which(is.na(res$padj)) +res$padj[na_indices] <- 1 +res_sig <- res[res$padj <= 0.1,] +de_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=rownames(res_sig), mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 +# hub genes +res_hub <- fread(file_dn) %>% filter(nodebetweenness_norm >= 1) +dn_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=res_hub$ensembl_id, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +# DE and hub genes +dedn_human <- intersect(de_human, dn_human) +de_human <- setdiff(de_human, dedn_human) +dn_human <- setdiff(dn_human, dedn_human) + +## 4. Read background mouse genes +background <- read.table(file_background, + row.names = NULL) +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +## 5. DE and hub genes per module +modules_genes <- modules_genes %>% + filter(ensembl_gene_id %in% background_human) %>% # subset modules to background genes + mutate(de = ifelse(ensembl_gene_id %in% de_human, TRUE, FALSE), # add column to indicate if gene is DE gene + hub = ifelse(ensembl_gene_id %in% dn_human, TRUE, FALSE), + de_hub = ifelse(ensembl_gene_id %in% dedn_human, TRUE, FALSE)) # add column to indicate if gene is hub gene + +# group by module and count +modules_count <- modules_genes %>% + count(Module) + +# count DE and hub per module +de_count <- modules_genes %>% + group_by(Module) %>% + count(de) %>% + filter(de) + +hub_count <- modules_genes %>% + group_by(Module) %>% + count(hub) %>% + filter(hub) + +dedn_count <- modules_genes %>% + group_by(Module) %>% + count(de_hub) %>% + filter(de_hub) + +# fisher's exact test for overrepresentation of DE and hub genes +# one-sided fisher's exact test with alternative "greater" is the same as +# hypergeometric test (https://stats.stackexchange.com/questions/288081/use-fishers-exact-test-or-a-hypergeometric-test) +# Hypergeometric test (phyper) is also called by fora function from fgsea package +fis_test <- function(a1,b1,c1,d1){ + fisher.test(matrix(c(a1,b1,c1,d1), + nrow = 2, ncol = 2), alternative = "greater")$p.value +} +de_test <- modules_count %>% + left_join(de_count[c("Module", "n")], by="Module") %>% + replace(is.na(.), 0) %>% + mutate(b = n.x - n.y, + c = length(de_human) - n.y, + d = nrow(modules_genes) - n.y - b - c) %>% + rowwise() %>% + mutate(fis_p = fis_test(n.y,b,c,d)) +de_test$fis_fdr <- p.adjust(de_test$fis_p, method = "fdr") +de_test <- de_test %>% + mutate( + label_de = case_when( + fis_fdr > 0.05 ~ "", + fis_fdr > 0.01 ~ "*", + fis_fdr > 0.001 ~ "**", + !is.na(fis_fdr) ~ "***", + TRUE ~ NA_character_ + ) + ) + +hub_test <- modules_count %>% + left_join(hub_count[c("Module", "n")], by="Module") %>% + replace(is.na(.), 0) %>% + mutate(b = n.x - n.y, + c = length(dn_human) - n.y, + d = nrow(modules_genes) - n.y - b - c) %>% + rowwise() %>% + mutate(fis_p = fis_test(n.y,b,c,d)) +hub_test$fis_fdr <- p.adjust(hub_test$fis_p, method = "fdr") +hub_test <- hub_test %>% + mutate( + label_hub = case_when( + fis_fdr > 0.05 ~ "", + fis_fdr > 0.01 ~ "*", + fis_fdr > 0.001 ~ "**", + !is.na(fis_fdr) ~ "***", + TRUE ~ NA_character_ + ) + ) + +# join DE and hub counts to general count +df_count <- modules_count %>% + left_join(de_count[c("Module", "n")], by="Module") %>% + left_join(hub_count[c("Module", "n")], by="Module") %>% + left_join(dedn_count[c("Module", "n")], by="Module") %>% + mutate(o = factor(as.numeric(str_replace(Module, "geneM", "")), + levels = sort(as.numeric(str_replace(Module, "geneM", "")))) + ) %>% + replace(is.na(.), 0) %>% + mutate(genes_de = n.y, + genes_hub = n.x.x, + genes_dehub = n.y.y, + genes_rest = n.x-n.y-n.x.x-n.y.y) %>% + pivot_longer(cols = starts_with("genes_"), + names_to = "colour", + values_to = "count") %>% + mutate(colour = factor(colour, + levels = c("genes_rest", "genes_hub", "genes_de", "genes_dehub"))) %>% + left_join(de_test[c("Module", "label_de")], by="Module") %>% + left_join(hub_test[c("Module", "label_hub")], by="Module") +df_count$label_de[df_count$colour != "genes_de"] <- "" +df_count$label_hub[df_count$colour != "genes_hub"] <- "" + + +# prepare disease df +df_disease <- modules_disease %>% + mutate(o = factor(as.numeric(str_replace(ModulName, "geneM", "")), + levels = levels(df_count$o))) + + +## 6. Barplot +b <- ggplot(df_count, aes(x=o, y=count, fill=colour)) + + geom_bar(stat = "identity") + + scale_fill_manual(breaks = c("genes_rest", "genes_hub", "genes_de", "genes_dehub"), + values = c("grey", "darkred", "orange", "#7E2274"), + labels = c("Module genes not DE or hub", + "Hub genes", "DE genes", "DE and hub genes")) + + guides(fill = guide_legend(title = "Gene set")) + + xlab("Module") + + ylab("Number of genes") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) + +b_perc <- ggplot(df_count, aes(x=o, y=count, fill=colour)) + + geom_bar(position = "fill", stat = "identity") + + geom_text(aes(label=label_de), + position=position_fill(vjust=0.5), colour="white", size =6) + + geom_text(aes(label=label_hub), + position=position_fill(vjust=0.5), colour="white", size =6) + + scale_fill_manual(breaks = c("genes_rest", "genes_hub", "genes_de", "genes_dehub"), + values = c("grey", "darkred", "orange", "#7E2274"), + labels = c("Module genes not DE or hub", + "Hub genes", "DE genes", "DE and hub genes")) + + xlab("Module") + + ylab("Percentage of genes") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12) + ) + +h <- ggplot(df_disease, aes(x=o, y=Group, fill=-log10(fdr))) + + geom_tile() + + scale_fill_gradient(name = "Disease association: -log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,max(-log10(df_disease$fdr)))) + + scale_x_discrete(drop=FALSE) + + ylab("Disease") + + xlab("Module") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) + + +comb <- ggarrange(b + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()), + b_perc + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()), + h , + #legend.grob = get_legend(b), + nrow = 3, + heights=c(2,2,1), + align = "v") +comb + +ggexport(comb, + filename = paste0(basedir, + "/scripts_manuscript/04_PlotsManuscript/Revision/18_Reviewer2_2b_Networks_Percentage_significanceFDR.pdf"), + width = 14, height = 8) + + diff --git a/01_DiffExp/19_EnrichmentTcf4DN_mutantMice.R b/01_DiffExp/19_EnrichmentTcf4DN_mutantMice.R new file mode 100644 index 0000000..4860f96 --- /dev/null +++ b/01_DiffExp/19_EnrichmentTcf4DN_mutantMice.R @@ -0,0 +1,75 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 16.09.2022 +## Author: Nathalie +################################################## +# Test if Tcf4 DN is enriched for genes that are actually +# dysregulated upon Tcf4 mutant mice: +# (https://pubmed.ncbi.nlm.nih.gov/32015540/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse/" +folder_table <- paste0(basedir,"/tables") +file_tcf4 <- paste0(basedir, "data/reviews/TCF4 PFC DN.xlsx") +file_mutant_tcf4 <- paste0(basedir, "data/reviews/Phan et al_Nat Neuro_2020 TCF4.xls") +file_background <- paste0(folder_table, "/06_background.txt") + + +## 0. Set up BioMart for ID mapping +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + + +## 1. Read background mouse genes from our dataset +background <- read.table(file_background, + row.names = NULL) + + +## 2. Read Tcf4 DN from PFC +tcf4_dn <- read_excel(file_tcf4, sheet = "Sheet1") +tcf4_dn <- getBM(attributes = c("ensembl_gene_id", "mgi_symbol"), # no difference between v3 and v4 + filters = "mgi_symbol", + values = tcf4_dn$`unique identifiers`, mart = mouse) + + +## 3. Read mutant Tcf4 mice DE genes +mutant <- list() +# adult mice +tcf4_mutant <- read_excel(file_mutant_tcf4, sheet = "Supp Table 2-Adult DESeq2") +tcf4_demutant <- tcf4_mutant$...1[tcf4_mutant$padj <= 0.05] +tcf4_demutant <- tcf4_demutant[tcf4_demutant %in% background$V1] +mutant[["tcf4_mutant_adult"]] <- tcf4_demutant + +# p1 mice +tcf4_mutant <- read_excel(file_mutant_tcf4, sheet = "Supp Table 2-p1 DESeq2") +tcf4_demutant <- tcf4_mutant$...1[tcf4_mutant$padj <= 0.05] +tcf4_demutant <- tcf4_demutant[tcf4_demutant %in% background$V1] +mutant[["tcf4_mutant_p1"]] <- tcf4_demutant + +# both +tcf4_mutant <- read_excel(file_mutant_tcf4, sheet = "Supp Table 2-Both DESeq2") +tcf4_demutant <- tcf4_mutant$...1[tcf4_mutant$padj_p1 <= 0.05 & tcf4_mutant$padj_Adult] +tcf4_demutant <- tcf4_demutant[tcf4_demutant %in% background$V1] +mutant[["both"]] <- tcf4_demutant + +## 4. Overrepresentation test +# Check if mutant Tcf4 DE genes are enriched in our network +foraRes <- fora(pathways = mutant, + genes = tcf4_dn$ensembl_gene_id, + universe = background$V1) + + +## 5. Write to file +write.csv(as.matrix(foraRes), + file = paste0(basedir, + "scripts_manuscript/04_PlotsManuscript/Revision/19_Reviewer2_3_Tcf4_enrichments.csv")) + diff --git a/04_PlotsManuscript/Revision/13_GRexpression_plots.R b/04_PlotsManuscript/Revision/13_GRexpression_plots.R new file mode 100644 index 0000000..37f764e --- /dev/null +++ b/04_PlotsManuscript/Revision/13_GRexpression_plots.R @@ -0,0 +1,226 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 06.09.2022 +## Author: Nathalie +################################################## +# Plot GR expression level at control level in all brain regions +# and check if correlation with number of DE genes exists +# Suggestion of reviewer + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) + +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +mean_exp_file <- file.path(folder_table, "12_meanExp_regionDex.csv") +files <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) + +# IDs +gene_symbol_gr <- "Nr3c1" +gene_symbol_mr <- "Nr3c2" +ensembl_gr <- "ENSMUSG00000024431" +ensembl_mr <- "ENSMUSG00000031618" + + +## 1. Read mean expression data +data <- read.table(mean_exp_file, sep = ";", header = TRUE) %>% + dplyr::select(contains("CNTRL"), gene_symbol) %>% + dplyr::rename_all(funs(str_replace_all(., "_CNTRL", ""))) +gr_mr <- data[data$gene_symbol == gene_symbol_gr | data$gene_symbol == gene_symbol_mr,] %>% # subset to GR and MR + tibble::remove_rownames() %>% + tibble::column_to_rownames(var = "gene_symbol") +gr_mr <- as.data.frame(t(gr_mr)) %>% + tibble::rownames_to_column("region") + +## 2. Number of DE genes per region +# read files with DE results +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files) +expr_list <- lapply(files, function(x) fread(x)) +names(expr_list) <- regions_files +# concatenate DE results to get those for GR and MR +de_df <- bind_rows(expr_list, .id="region") %>% + dplyr::filter(V1 == ensembl_gr | V1 == ensembl_mr) %>% + dplyr::mutate(gene = case_when( + V1 == ensembl_gr ~ "Nr3c1", + V1 == ensembl_mr ~ "Nr3c2" + )) %>% + dplyr::select(region, gene, log2FoldChange, padj) %>% + pivot_wider(names_from = gene, values_from = c(log2FoldChange,padj)) +# filter each df to genes with FDR smaller 0.1 +expr_list <- lapply(expr_list, function(x) dplyr::filter(x, padj <= 0.1)) +df_degenes <- bind_rows(expr_list, .id="region") %>% + mutate(up_down = ifelse(log2FoldChange > 0, "up", "down")) +n_degenes <- df_degenes %>% + group_by(region) %>% + count() +n_updowngenes <- df_degenes %>% + group_by(region, up_down) %>% + count() + +## 3. Join mean expression values with number of DE genes +gr_mr <- inner_join(gr_mr, n_degenes, by = "region") +gr_mr <- dplyr::rename(gr_mr, nr_de = n) +gr_mr <- inner_join(gr_mr, de_df, by="region") + +gr_mr_updown <- inner_join(gr_mr, n_updowngenes, by = "region") %>% + mutate(up_down = factor(gr_mr_updown$up_down, levels = c("up", "down"))) + +## 4. Plot expression of GR and MR against number of DE genes +# GR +ggplot(gr_mr, aes(x=Nr3c1, y=nr_de, + size=log2FoldChange_Nr3c1, + color=-log10(padj_Nr3c1), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr$Nr3c1),max(gr_mr$Nr3c1)+0.1) + + xlab("Normalised Expression Level GR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GR.pdf"), + width = 8, + height = 6) + +# MR +ggplot(gr_mr, aes(x=Nr3c2, y=nr_de, + size=log2FoldChange_Nr3c2, + color=-log10(padj_Nr3c2), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr$Nr3c2),max(gr_mr$Nr3c2)+0.1) + + xlab("Normalised Expression Level MR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_MR.pdf"), + width = 8, + height = 6) + + +## 5. Plot expression of GR and MR against number of DE genes - stratified by up and downregulation +# GR +ggplot(gr_mr_updown, aes(x=Nr3c1, y=n, + size=log2FoldChange_Nr3c1, + color=-log10(padj_Nr3c1), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr_updown$Nr3c1),max(gr_mr_updown$Nr3c1)+0.1) + + xlab("Normalised Expression Level GR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + facet_wrap(~up_down, nrow = 2, scales = "free", + labeller = labeller(up_down = + c("down" = "Downregulated genes", + "up" = "Upregulated genes") + )) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GR_updown.pdf"), + width = 8, + height = 6) + +# MR +ggplot(gr_mr_updown, aes(x=Nr3c2, y=n, + size=log2FoldChange_Nr3c2, + color=-log10(padj_Nr3c2), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr_updown$Nr3c2),max(gr_mr_updown$Nr3c2)+0.1) + + xlab("Normalised Expression Level MR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + facet_wrap(~up_down, nrow = 2, scales = "free", + labeller = labeller(up_down = + c("down" = "Downregulated genes", + "up" = "Upregulated genes") + )) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_MR_updown.pdf"), + width = 8, + height = 6) + + +## 6. Plot GR/MR ratio against number of DE genes - stratified by up and downregulation +gr_mr_updown <- gr_mr_updown %>% + mutate(grmr_ratio = Nr3c1/Nr3c2) + +# GR/MR ratio +ggplot(gr_mr_updown, aes(x=grmr_ratio, y=n, + size=log2FoldChange_Nr3c2, + color=-log10(padj_Nr3c2), + label=region)) + + geom_point() + + geom_text(hjust=-0.25, vjust=-0.25, + size=4, color="black") + + xlim(min(gr_mr_updown$grmr_ratio),max(gr_mr_updown$grmr_ratio)+0.1) + + xlab("Normalised Expression Level MR") + + ylab("Number of DE genes") + + scale_color_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,13)) + + scale_size_continuous(name="log2(FoldChange)") + + facet_wrap(~up_down, nrow = 2, scales = "free", + labeller = labeller(up_down = + c("down" = "Downregulated genes", + "up" = "Upregulated genes") + )) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GRMRratio_updown.pdf"), + width = 8, + height = 6) diff --git a/04_PlotsManuscript/Revision/14_CompGWAS_DE.R b/04_PlotsManuscript/Revision/14_CompGWAS_DE.R new file mode 100644 index 0000000..07ac274 --- /dev/null +++ b/04_PlotsManuscript/Revision/14_CompGWAS_DE.R @@ -0,0 +1,147 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 07.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) + ranks <- data$ZSTAT + names(ranks) <- data$GENE + + + ## 3. Run fgsea for Gene Set Enrichment Analysis + fgseaRes <- fgsea(pathways = de_human, + stats = ranks) + # sampleSize = 600) + res_list[[trait]] <- fgseaRes + + # unique DE genes + fgseaRes_uni <- fgsea(pathways = de_uni_human, + stats = ranks) + # sampleSize = 600) + res_list_uni[[trait]] <- fgseaRes_uni + + plotEnrichment(de_uni_human[["PFC"]], + ranks) + +} + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEall.pdf"), + width = 8, + height = 6) + + +# plot for unique DE genes +df <- bind_rows(res_list_uni, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEunique.pdf"), + width = 8, + height = 6) diff --git a/04_PlotsManuscript/Revision/14a_CompGWAS_DE_Enrichment.R b/04_PlotsManuscript/Revision/14a_CompGWAS_DE_Enrichment.R new file mode 100644 index 0000000..afc7512 --- /dev/null +++ b/04_PlotsManuscript/Revision/14a_CompGWAS_DE_Enrichment.R @@ -0,0 +1,151 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 19.09.2022 +## Author: Nathalie +################################################## +# Compare DEs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) +# --> instead of GSEA we look at normal enrichment + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ", "MS") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) %>% + # mutate(fdr = p.adjust(P, method = "fdr")) %>% + # filter(fdr <= 0.05) + filter(P <= 0.05) + + ## 3. Run fora for overrepresentation test + foraRes <- fora(pathways = de_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list[[trait]] <- foraRes + + # unique DE genes + foraRes_uni <- fora(pathways = de_uni_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list_uni[[trait]] <- foraRes_uni + + # plotEnrichment(de_human[["PFC"]], + # ranks) + +} + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,2)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEall.pdf"), + width = 8, + height = 6) + + +# plot for unique DE genes +df_uni <- bind_rows(res_list_uni, .id="trait") +df_uni$FDR <- p.adjust(df_uni$pval, method = "fdr") # apply correction across all tests +ggplot(df_uni, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEunique.pdf"), + width = 8, + height = 6) \ No newline at end of file diff --git a/04_PlotsManuscript/Revision/14b_CompGWAS_DE_otherTable.R b/04_PlotsManuscript/Revision/14b_CompGWAS_DE_otherTable.R new file mode 100644 index 0000000..951a1dd --- /dev/null +++ b/04_PlotsManuscript/Revision/14b_CompGWAS_DE_otherTable.R @@ -0,0 +1,156 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 19.09.2022 +## Author: Nathalie +################################################## +# Compare DEs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) +# --> instead of GSEA we look at normal enrichment + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ", "MS") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/MAGMA published.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +magma <- read_excel(file_hmagma, sheet = "Disorder-associated genes") %>% + pivot_longer(cols = ADHD:PD, + names_to = "disease", + values_to = "genes") %>% + filter(!is.na(genes)) +magma_ensembl <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), + filters = "hgnc_symbol", + values = magma$genes, mart = human) +magma_ensembl <- right_join(magma, magma_ensembl, by=c("genes"="hgnc_symbol")) + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + # get genes for disease + dis_genes <- magma_ensembl$ensembl_gene_id[magma_ensembl$disease == trait] + + ## 3. Run fora for overrepresentation test + foraRes <- fora(pathways = de_human, + genes = dis_genes, + universe = background_human) + # sampleSize = 600) + res_list[[trait]] <- foraRes + + # unique DE genes + foraRes_uni <- fora(pathways = de_uni_human, + genes = dis_genes, + universe = background_human) + # sampleSize = 600) + res_list_uni[[trait]] <- foraRes_uni + + # plotEnrichment(de_human[["PFC"]], + # ranks) + +} + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,3)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +# ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEall.pdf"), +# width = 8, +# height = 6) + + +# plot for unique DE genes +df_uni <- bind_rows(res_list_uni, .id="trait") +df_uni$FDR <- p.adjust(df_uni$pval, method = "fdr") # apply correction across all tests +ggplot(df_uni, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +# ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEunique.pdf"), +# width = 8, +# height = 6) \ No newline at end of file diff --git a/04_PlotsManuscript/Revision/14c_CompGWAS_DE_CrossDisorder.R b/04_PlotsManuscript/Revision/14c_CompGWAS_DE_CrossDisorder.R new file mode 100644 index 0000000..255a906 --- /dev/null +++ b/04_PlotsManuscript/Revision/14c_CompGWAS_DE_CrossDisorder.R @@ -0,0 +1,140 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 19.09.2022 +## Author: Nathalie +################################################## +# Compare DEs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) +# --> instead of GSEA we look at normal enrichment + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_crossdisorder <- paste0(basedir, "/data/reviews/cross-disorder.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + de_uni[[r]] <- data_unique$V1[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + + +## 2. Mapping of mouse IDs to human IDs +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + + + +## 2. Read GWAS/H-MAGMA data +data <- read_excel(file_crossdisorder, sheet="Sheet1") +data_ens <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), # no difference between v3 and v4 + filters = "hgnc_symbol", + values = data$`Cross-disorder GWAS`, mart = human) +cd_genes <- data_ens$ensembl_gene_id[data_ens$ensembl_gene_id %in% background_human] + +## 3. Run fora for overrepresentation test +foraRes <- fora(pathways = de_human, + genes = cd_genes, + universe = background_human) +# sampleSize = 600) +res_list[[trait]] <- foraRes + +# unique DE genes +foraRes_uni <- fora(pathways = de_uni_human, + genes = data$GENE, + universe = background_human) +# sampleSize = 600) +res_list_uni[[trait]] <- foraRes_uni + + + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,2)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEall.pdf"), + width = 8, + height = 6) + + +# plot for unique DE genes +df_uni <- bind_rows(res_list_uni, .id="trait") +df_uni$FDR <- p.adjust(df_uni$pval, method = "fdr") # apply correction across all tests +ggplot(df_uni, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14a_Reviewer2_1a_DEunique.pdf"), + width = 8, + height = 6) \ No newline at end of file diff --git a/04_PlotsManuscript/Revision/15_CompGWAS_DN.R b/04_PlotsManuscript/Revision/15_CompGWAS_DN.R new file mode 100644 index 0000000..c45ed7c --- /dev/null +++ b/04_PlotsManuscript/Revision/15_CompGWAS_DN.R @@ -0,0 +1,148 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 07.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) + +# TODO: think about background --> restrict to genes in our dataset? + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_dn <- list.files(path = paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/"), + pattern = "_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv$", full.names = TRUE) +regions_files <- sub(".*03a_(\\w*)_funcoup_treatment_nodebetween.*","\\1",files_dn) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_dn, function(x) fread(x) %>% filter(nodebetweenness_norm >= 1)) +names(expr_list) <- regions_files + +dn_list <- lapply(expr_list, function(x) x$ensembl_id) + +# unique DN genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(ensembl_id) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +dn_uni <- list() +for (r in regions_files){ + dn_uni[[r]] <- data_unique$ensembl_id[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +dn_human <- lapply(dn_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +dn_uni_human <- lapply(dn_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) + ranks <- data$ZSTAT + names(ranks) <- data$GENE + + + ## 3. Run fgsea for Gene Set Enrichment Analysis + fgseaRes <- fgsea(pathways = dn_human, + stats = ranks) + # sampleSize = 600) + res_list[[trait]] <- fgseaRes + + # unique DN genes + fgseaRes_uni <- fgsea(pathways = dn_uni_human, + stats = ranks) + # sampleSize = 600) + res_list_uni[[trait]] <- fgseaRes_uni + + # plotEnrichment(dn_human[["PFC"]], + # ranks) + +} + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15_Reviewer2_1b_DNall.pdf"), + width = 8, + height = 6) + + +# plot for unique DN genes +df <- bind_rows(res_list_uni, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15_Reviewer2_1b_DNunique.pdf"), + width = 8, + height = 6) diff --git a/04_PlotsManuscript/Revision/15a_CompGWAS_DN_Enrichment.R b/04_PlotsManuscript/Revision/15a_CompGWAS_DN_Enrichment.R new file mode 100644 index 0000000..2ae82d4 --- /dev/null +++ b/04_PlotsManuscript/Revision/15a_CompGWAS_DN_Enrichment.R @@ -0,0 +1,150 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 07.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with previously identified GWAS risk genes +# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/) + +# TODO: think about background --> restrict to genes in our dataset? + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +# trait of interest +traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ", "MS") +# trait <- "MDD" + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx") +files_dn <- list.files(path = paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/"), + pattern = "_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv$", full.names = TRUE) +regions_files <- sub(".*03a_(\\w*)_funcoup_treatment_nodebetween.*","\\1",files_dn) +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Read DE genes from different brain regions +expr_list <- lapply(files_dn, function(x) fread(x) %>% filter(nodebetweenness_norm >= 1)) +names(expr_list) <- regions_files + +dn_list <- lapply(expr_list, function(x) x$ensembl_id) + +# unique DN genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(ensembl_id) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +dn_uni <- list() +for (r in regions_files){ + dn_uni[[r]] <- data_unique$ensembl_id[data_unique$region == r] +} + +# background genes +background <- read.table(file_background, + row.names = NULL) + +# map mouse Ensembl Ids to human Ensembl Ids +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors +# --> one mouse ID can be mapped to multiple human IDs + +dn_human <- lapply(dn_list, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +dn_uni_human <- lapply(dn_uni, function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +res_list <- list() +res_list_uni <- list() +for (trait in traits){ + + ## 2. Read GWAS/H-MAGMA data + data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>% + arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper + filter(GENE %in% background_human) %>% + # mutate(fdr = p.adjust(P, method = "fdr")) %>% + # filter(fdr <= 0.05) + filter(P <= 0.05) + + ## 3. Run fora for overrepresentation test + foraRes <- fora(pathways = dn_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list[[trait]] <- foraRes + + # unique DE genes + foraRes_uni <- fora(pathways = dn_uni_human, + genes = data$GENE, + universe = background_human) + # sampleSize = 600) + res_list_uni[[trait]] <- foraRes_uni + + # plotEnrichment(dn_human[["PFC"]], + # ranks) + +} + +## 4. Plot GSEA p-values as heatmap from all traits and brain regions +df <- bind_rows(res_list, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,2)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15a_Reviewer2_1b_DNall.pdf"), + width = 8, + height = 6) + + +# plot for unique DN genes +df <- bind_rows(res_list_uni, .id="trait") +df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests +ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) + + geom_tile() + + xlab("GWAS trait") + + ylab("Brain region") + + scale_fill_gradient(name = "-log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,1)) + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) +ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/15a_Reviewer2_1b_DNunique.pdf"), + width = 8, + height = 6) diff --git a/04_PlotsManuscript/Revision/16_CompDexHumanResults.R b/04_PlotsManuscript/Revision/16_CompDexHumanResults.R new file mode 100644 index 0000000..f1565b7 --- /dev/null +++ b/04_PlotsManuscript/Revision/16_CompDexHumanResults.R @@ -0,0 +1,108 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 14.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with DE genes from human Dex study +# (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8669026/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(biomaRt) +library(ggplot2) +library(UpSetR) +library(rlist) + + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_human <- paste0(basedir, "/data/reviews/DexHumanBlood_MooreEtAl/41398_2021_1756_MOESM1_ESM.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +print(sub(".*02_(\\w*)_deseq2.*","\\1",files_de)) +regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de) +file_background <- paste0(folder_table, "/06_background.txt") + +## 0. Set up BioMart +# map mouse Ensembl Ids to human Ensembl Ids --> homology mapping +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + + +## 1. Read differentially expressed genes from human data into list +data <- read_excel(file_human, sheet="2_DEA_results") + +# get Ensembl IDs for human genes +ens_human <- getBM(attributes = c("ensembl_gene_id", "illumina_humanht_12_v3"), # no difference between v3 and v4 + filters = "illumina_humanht_12_v3", + values = data$Probe_Id, mart = human) +data <- dplyr::inner_join(data, ens_human, by = c("Probe_Id"="illumina_humanht_12_v3")) + +## 2.Read background genes +background <- read.table(file_background, + row.names = NULL) +# get human ensembl IDs for background genes +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +## 3. Subset human DE genes +# subset human DE genes to those in background set +data <- data[data$ensembl_gene_id %in% background_human,] + +# DE genes +data_de <- data[data$padj <= 0.05,] + +## 4. Read DE tables from all regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) +de_list_human <- lapply(de_list, + function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + +# unique DE genes +data_unique <- bind_rows(expr_list, .id = "region") %>% + group_by(V1) %>% + summarise(region = list(region)) %>% + mutate(nr_regions = lengths(region)) %>% + mutate(unique = (nr_regions == 1)) %>% + filter(unique) + +de_uni <- list() +for (r in regions_files){ + tmp <- data_unique$V1[data_unique$region == r] + de_uni[[r]] <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=tmp, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 +} + + +## 5. Plot overlap between human and mouse DE genes +# unique DE genes +list_flat <- c(de_uni, list("human" = data_de$ensembl_gene_id)) +pdf(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/16_Reviewer2_0_DEGDexHuman_DEunique.pdf"), + width = 12, height = 8) +par(mar=c(6,5,4,2) + 0.1) +print(upset(fromList(list_flat), nsets = 9, order.by = "freq", + text.scale = c(1.8, 1.9, 1.8, 1.9, 1.9, 1.9), + sets.x.label = "#DE genes", + mainbar.y.label = "#DE genes in intersection")) +dev.off() + +# all DE genes +list_flat <- c(de_list_human, list("human" = data_de$ensembl_gene_id)) +pdf(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/16_Reviewer2_0_DEGDexHuman_DEall.pdf"), + width = 12, height = 8) +print(upset(fromList(list_flat), nsets = 9, order.by = "freq", + text.scale = c(1.8, 1.9, 1.8, 1.9, 1.9, 1.9), + sets.x.label = "#DE genes", + mainbar.y.label = "#DE genes in intersection")) +dev.off() diff --git a/04_PlotsManuscript/Revision/17_CompPostMortemResults.R b/04_PlotsManuscript/Revision/17_CompPostMortemResults.R new file mode 100644 index 0000000..f7df5bb --- /dev/null +++ b/04_PlotsManuscript/Revision/17_CompPostMortemResults.R @@ -0,0 +1,90 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 14.09.2022 +## Author: Nathalie +################################################## +# Compare DEs and DNs with DE genes from post mortem study +# (https://pubmed.ncbi.nlm.nih.gov/30545856/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(biomaRt) +library(ggplot2) +library(UpSetR) +library(rlist) + + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_pm <- paste0(basedir, "/data/reviews/PostmortemBrain/aat8127_table_s1.xlsx") +files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE) +print(sub(".*02_(\\w*)_deseq2.*","\\1",files_de)) +files_dn <- list.files(path = paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/"), + pattern = "_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv$", full.names = TRUE) +regions_files <- sub(".*03a_(\\w*)_funcoup_treatment_nodebetween.*","\\1",files_dn) +file_background <- paste0(folder_table, "/06_background.txt") + +## 1. Set up BioMart +# map mouse Ensembl Ids to human Ensembl Ids --> homology mapping +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + + +## 2. Read background genes and get human background IDs +background <- read.table(file_background, + row.names = NULL) +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + + +## 3. Read differentially expressed genes and transcripts from postmortem study +list_pm_genes <- list() +for (r in c("DGE", "DTE", "DTU")){ + # read file including ASD, SCZ and BD results + data <- read_excel(file_pm, sheet=r) + + list_pm_genes[[r]] <- list() + + for (d in c("ASD", "BD", "SCZ")){ + data_d <- data %>% + rename_with(~ gsub('DTU\\.', '', .x)) %>% # DTU list has prefix "DTU" in column names + dplyr::select("ensembl_gene_id", starts_with(d)) %>% + rename_with(~ gsub(paste0(d,'\\.'), '', .x)) %>% + rename_with(tolower) %>% # DTU table has FDR instead of fdr + dplyr::filter(fdr <= 0.05) + list_pm_genes[[r]][[d]] <- data_d$ensembl_gene_id[data_d$ensembl_gene_id %in% background_human] + } +} + + +## 4. Read DE tables from all regions +expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1)) +names(expr_list) <- regions_files + +de_list <- lapply(expr_list, function(x) x$V1) +de_list_human <- lapply(de_list, + function(x) getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=x, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1) + + +## 5. Upset plot of PFC and post mortem brain DE genes +list_flat <- list("PFC.mouse" = de_list_human[["PFC"]], + "PFC.human.ASD" = list_pm_genes[["DGE"]][["ASD"]], + "PFC.human.BD" = list_pm_genes[["DGE"]][["BD"]], + "PFC.human.SCZ" = list_pm_genes[["DGE"]][["SCZ"]]) +pdf(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/17_Reviewer2_2_DEGpostmortem.pdf"), + width = 12, height = 8) +par(mar=c(6,5,4,2) + 0.1) +print(upset(fromList(list_flat), nsets = 4, order.by = "freq", + text.scale = c(1.8, 1.9, 1.8, 1.9, 1.9, 1.9), + sets.x.label = "#DE genes", + mainbar.y.label = "#DE genes in intersection")) +dev.off() diff --git a/04_PlotsManuscript/Revision/18_CompNetworksPostMortem.R b/04_PlotsManuscript/Revision/18_CompNetworksPostMortem.R new file mode 100644 index 0000000..cca58a7 --- /dev/null +++ b/04_PlotsManuscript/Revision/18_CompNetworksPostMortem.R @@ -0,0 +1,251 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 16.09.2022 +## Author: Nathalie +################################################## +# Compare hub genes with disease associated modules +# from postmortem study +# (https://pubmed.ncbi.nlm.nih.gov/30545856/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(biomaRt) +library(ggplot2) +library(ggpubr) + + +# define pathes +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse" +folder_table <- paste0(basedir,"/tables") +file_modules <- paste0(basedir, "/data/reviews/PostmortemBrain/aat8127_table_s5.xlsx") +file_de <- paste0(folder_table, "/02_PFC_deseq2_Dex_0_vs_1_lfcShrink.txt") +file_dn <- paste0(folder_table, "/coExpression_kimono/03_AnalysisFuncoup/", + "03a_PFC_funcoup_treatment_nodebetweennessNorm_betacutoff0.01.csv") +file_background <- paste0(folder_table, "/06_background.txt") + + +## 1. Set up BioMart +# map mouse Ensembl Ids to human Ensembl Ids --> homology mapping +human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + +## 2. Read modules and disease associations +modules_genes <- read_excel(file_modules, sheet="geneModules") +modules_disease <- read_excel(file_modules, sheet = "Module-DiseaseAssociation") %>% + filter(Network == "gene") + + +## 3. Read DE and hub genes from mouse in PFC +# DE genes +res <- read.table(file_de, sep="\t", + header = TRUE) +na_indices <- which(is.na(res$padj)) +res$padj[na_indices] <- 1 +res_sig <- res[res$padj <= 0.1,] +de_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=rownames(res_sig), mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 +# hub genes +res_hub <- fread(file_dn) %>% filter(nodebetweenness_norm >= 1) +dn_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=res_hub$ensembl_id, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +# DE and hub genes +dedn_human <- intersect(de_human, dn_human) +de_human <- setdiff(de_human, dedn_human) +dn_human <- setdiff(dn_human, dedn_human) + +## 4. Read background mouse genes +background <- read.table(file_background, + row.names = NULL) +background_human <- getLDS(attributes=c("ensembl_gene_id"), + filters="ensembl_gene_id", values=background$V1, mart=mouse, + attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1 + +## 5. DE and hub genes per module +modules_genes <- modules_genes %>% + filter(ensembl_gene_id %in% background_human) %>% # subset modules to background genes + mutate(de = ifelse(ensembl_gene_id %in% de_human, TRUE, FALSE), # add column to indicate if gene is DE gene + hub = ifelse(ensembl_gene_id %in% dn_human, TRUE, FALSE), + de_hub = ifelse(ensembl_gene_id %in% dedn_human, TRUE, FALSE)) # add column to indicate if gene is hub gene + +# group by module and count +modules_count <- modules_genes %>% + count(Module) + +# count DE and hub per module +de_count <- modules_genes %>% + group_by(Module) %>% + count(de) %>% + filter(de) + +hub_count <- modules_genes %>% + group_by(Module) %>% + count(hub) %>% + filter(hub) + +dedn_count <- modules_genes %>% + group_by(Module) %>% + count(de_hub) %>% + filter(de_hub) + +# fisher's exact test for overrepresentation of DE and hub genes +# one-sided fisher's exact test with alternative "greater" is the same as +# hypergeometric test (https://stats.stackexchange.com/questions/288081/use-fishers-exact-test-or-a-hypergeometric-test) +# Hypergeometric test (phyper) is also called by fora function from fgsea package +fis_test <- function(a1,b1,c1,d1){ + fisher.test(matrix(c(a1,b1,c1,d1), + nrow = 2, ncol = 2), alternative = "greater")$p.value +} +de_test <- modules_count %>% + left_join(de_count[c("Module", "n")], by="Module") %>% + replace(is.na(.), 0) %>% + mutate(b = n.x - n.y, + c = length(de_human) - n.y, + d = nrow(modules_genes) - n.y - b - c) %>% + rowwise() %>% + mutate(fis_p = fis_test(n.y,b,c,d)) +de_test$fis_fdr <- p.adjust(de_test$fis_p, method = "fdr") +de_test <- de_test %>% + mutate( + label_de = case_when( + fis_fdr > 0.05 ~ "", + fis_fdr > 0.01 ~ "*", + fis_fdr > 0.001 ~ "**", + !is.na(fis_fdr) ~ "***", + TRUE ~ NA_character_ + ) + ) + +hub_test <- modules_count %>% + left_join(hub_count[c("Module", "n")], by="Module") %>% + replace(is.na(.), 0) %>% + mutate(b = n.x - n.y, + c = length(dn_human) - n.y, + d = nrow(modules_genes) - n.y - b - c) %>% + rowwise() %>% + mutate(fis_p = fis_test(n.y,b,c,d)) +hub_test$fis_fdr <- p.adjust(hub_test$fis_p, method = "fdr") +hub_test <- hub_test %>% + mutate( + label_hub = case_when( + fis_fdr > 0.05 ~ "", + fis_fdr > 0.01 ~ "*", + fis_fdr > 0.001 ~ "**", + !is.na(fis_fdr) ~ "***", + TRUE ~ NA_character_ + ) + ) + +# join DE and hub counts to general count +df_count <- modules_count %>% + left_join(de_count[c("Module", "n")], by="Module") %>% + left_join(hub_count[c("Module", "n")], by="Module") %>% + left_join(dedn_count[c("Module", "n")], by="Module") %>% + mutate(o = factor(as.numeric(str_replace(Module, "geneM", "")), + levels = sort(as.numeric(str_replace(Module, "geneM", "")))) + ) %>% + replace(is.na(.), 0) %>% + mutate(genes_de = n.y, + genes_hub = n.x.x, + genes_dehub = n.y.y, + genes_rest = n.x-n.y-n.x.x-n.y.y) %>% + pivot_longer(cols = starts_with("genes_"), + names_to = "colour", + values_to = "count") %>% + mutate(colour = factor(colour, + levels = c("genes_rest", "genes_hub", "genes_de", "genes_dehub"))) %>% + left_join(de_test[c("Module", "label_de")], by="Module") %>% + left_join(hub_test[c("Module", "label_hub")], by="Module") +df_count$label_de[df_count$colour != "genes_de"] <- "" +df_count$label_hub[df_count$colour != "genes_hub"] <- "" + + +# prepare disease df +df_disease <- modules_disease %>% + mutate(o = factor(as.numeric(str_replace(ModulName, "geneM", "")), + levels = levels(df_count$o))) + + +## 6. Barplot +b <- ggplot(df_count, aes(x=o, y=count, fill=colour)) + + geom_bar(stat = "identity") + + scale_fill_manual(breaks = c("genes_rest", "genes_hub", "genes_de", "genes_dehub"), + values = c("grey", "darkred", "orange", "#7E2274"), + labels = c("Module genes not DE or hub", + "Hub genes", "DE genes", "DE and hub genes")) + + guides(fill = guide_legend(title = "Gene set")) + + xlab("Module") + + ylab("Number of genes") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) + +b_perc <- ggplot(df_count, aes(x=o, y=count, fill=colour)) + + geom_bar(position = "fill", stat = "identity") + + geom_text(aes(label=label_de), + position=position_fill(vjust=0.5), colour="white", size =6) + + geom_text(aes(label=label_hub), + position=position_fill(vjust=0.5), colour="white", size =6) + + scale_fill_manual(breaks = c("genes_rest", "genes_hub", "genes_de", "genes_dehub"), + values = c("grey", "darkred", "orange", "#7E2274"), + labels = c("Module genes not DE or hub", + "Hub genes", "DE genes", "DE and hub genes")) + + xlab("Module") + + ylab("Percentage of genes") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12) + ) + +h <- ggplot(df_disease, aes(x=o, y=Group, fill=-log10(fdr))) + + geom_tile() + + scale_fill_gradient(name = "Disease association: -log10(FDR)", + low="lightgrey", high="#D45E60", + limits = c(0,max(-log10(df_disease$fdr)))) + + scale_x_discrete(drop=FALSE) + + ylab("Disease") + + xlab("Module") + + theme_light() + + theme( + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15), + axis.text.x = element_text(size = 12), + axis.text.y = element_text(size = 12), + legend.text = element_text(size = 12) + ) + + +comb <- ggarrange(b + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()), + b_perc + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()), + h , + #legend.grob = get_legend(b), + nrow = 3, + heights=c(2,2,1), + align = "v") +comb + +ggexport(comb, + filename = paste0(basedir, + "/scripts_manuscript/04_PlotsManuscript/Revision/18_Reviewer2_2b_Networks_Percentage_significanceFDR.pdf"), + width = 14, height = 8) + + diff --git a/04_PlotsManuscript/Revision/19_EnrichmentTcf4DN_mutantMice.R b/04_PlotsManuscript/Revision/19_EnrichmentTcf4DN_mutantMice.R new file mode 100644 index 0000000..4860f96 --- /dev/null +++ b/04_PlotsManuscript/Revision/19_EnrichmentTcf4DN_mutantMice.R @@ -0,0 +1,75 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 16.09.2022 +## Author: Nathalie +################################################## +# Test if Tcf4 DN is enriched for genes that are actually +# dysregulated upon Tcf4 mutant mice: +# (https://pubmed.ncbi.nlm.nih.gov/32015540/) + + +library(data.table) +library(tidyr) +library(dplyr) +library(stringr) +library(readxl) +library(fgsea) +library(biomaRt) +library(ggplot2) + +basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse/" +folder_table <- paste0(basedir,"/tables") +file_tcf4 <- paste0(basedir, "data/reviews/TCF4 PFC DN.xlsx") +file_mutant_tcf4 <- paste0(basedir, "data/reviews/Phan et al_Nat Neuro_2020 TCF4.xls") +file_background <- paste0(folder_table, "/06_background.txt") + + +## 0. Set up BioMart for ID mapping +mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") +# --> this is a workaround as the normal commands lead to errors + + +## 1. Read background mouse genes from our dataset +background <- read.table(file_background, + row.names = NULL) + + +## 2. Read Tcf4 DN from PFC +tcf4_dn <- read_excel(file_tcf4, sheet = "Sheet1") +tcf4_dn <- getBM(attributes = c("ensembl_gene_id", "mgi_symbol"), # no difference between v3 and v4 + filters = "mgi_symbol", + values = tcf4_dn$`unique identifiers`, mart = mouse) + + +## 3. Read mutant Tcf4 mice DE genes +mutant <- list() +# adult mice +tcf4_mutant <- read_excel(file_mutant_tcf4, sheet = "Supp Table 2-Adult DESeq2") +tcf4_demutant <- tcf4_mutant$...1[tcf4_mutant$padj <= 0.05] +tcf4_demutant <- tcf4_demutant[tcf4_demutant %in% background$V1] +mutant[["tcf4_mutant_adult"]] <- tcf4_demutant + +# p1 mice +tcf4_mutant <- read_excel(file_mutant_tcf4, sheet = "Supp Table 2-p1 DESeq2") +tcf4_demutant <- tcf4_mutant$...1[tcf4_mutant$padj <= 0.05] +tcf4_demutant <- tcf4_demutant[tcf4_demutant %in% background$V1] +mutant[["tcf4_mutant_p1"]] <- tcf4_demutant + +# both +tcf4_mutant <- read_excel(file_mutant_tcf4, sheet = "Supp Table 2-Both DESeq2") +tcf4_demutant <- tcf4_mutant$...1[tcf4_mutant$padj_p1 <= 0.05 & tcf4_mutant$padj_Adult] +tcf4_demutant <- tcf4_demutant[tcf4_demutant %in% background$V1] +mutant[["both"]] <- tcf4_demutant + +## 4. Overrepresentation test +# Check if mutant Tcf4 DE genes are enriched in our network +foraRes <- fora(pathways = mutant, + genes = tcf4_dn$ensembl_gene_id, + universe = background$V1) + + +## 5. Write to file +write.csv(as.matrix(foraRes), + file = paste0(basedir, + "scripts_manuscript/04_PlotsManuscript/Revision/19_Reviewer2_3_Tcf4_enrichments.csv")) + diff --git a/04_PlotsManuscript/plots_v2/01_Figure2_dotplots.R b/04_PlotsManuscript/plots_v2/01_Figure2_dotplots.R new file mode 100644 index 0000000..66f3499 --- /dev/null +++ b/04_PlotsManuscript/plots_v2/01_Figure2_dotplots.R @@ -0,0 +1,110 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GO Enrichment plot for manuscript +# Unique DE and hub genes in PFC + +library(readxl) +library(dplyr) +library(stringr) +library(ggplot2) + + +# 1. GO enrichment unique DE and hub genes in PFC + +data_hub <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/networks/FUMA_diff unique hubgenes_gene2func67664/PFC diff unique hubgenes_FUMA.xlsx", + sheet = "GO bp") + +data_DE <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/DE PFC/FUMA_gene2func66314/PFC unique DE genes _FUMA.xlsx", + sheet = "GO bp") + + +# data for left panel with top 14 terms for DE +top_DE <- data_DE %>% + dplyr::filter(N_overlap_A > 24) +top_DE <- top_DE[order(top_DE$`my adj p BH`, decreasing = FALSE),][1:14,] + +top_DE_hub <- data_hub %>% + filter(GeneSet %in% top_DE$GeneSet) + +top_DE_comb <- bind_rows("DE genes" = top_DE, "hub genes" = top_DE_hub, .id = "group") + + +# data for right panel with top 14 terms for hubs +top_hub <- data_hub %>% + dplyr::filter(N_overlap_A > 2) +top_hub <- top_hub[order(top_hub$`my adj p BH`, decreasing = FALSE),][1:14,] + +top_hub_DE <- data_DE %>% + filter(GeneSet %in% top_hub$GeneSet) + +missing_term <- setdiff(top_hub$GeneSet, top_hub_DE$GeneSet) +add_entry <- data.frame("GeneSet" = missing_term) +top_hub_DE <- bind_rows(top_hub_DE, add_entry) + +top_hub_comb <- bind_rows("hub genes" = top_hub, "DE genes" = top_hub_DE, .id = "group") + + +clean <- function(top_data) { + # remove the "GO_" from GO terms + top_data$GeneSet <- str_replace_all(top_data$GeneSet, '^GO_', ' ') + # remove the "_" from GO terms + top_data$GeneSet <- str_replace_all(top_data$GeneSet, "_", " ") + # only capitalize first letter per word + top_data$GeneSet <- str_to_title(top_data$GeneSet) + # of and to should start with lower case + top_data$GeneSet <- str_replace_all(top_data$GeneSet, "Of", "of") + top_data$GeneSet <- str_replace_all(top_data$GeneSet, "To", "to") + + return(top_data) +} + +top_DE_comb <- clean(top_DE_comb) +top_hub_comb <- clean(top_hub_comb) + +# order of terms +top_DE_comb$GeneSet <- factor(top_DE_comb$GeneSet, levels = top_DE_comb$GeneSet[14:1]) + +# dotplot terms DE +g <- ggplot(top_DE_comb) + + geom_point(aes(#y = reorder(GeneSet,-`my adj p BH`), + y = GeneSet, + x = group, + color = `[-log10 fdr`, + size = `Genes ratio`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "GeneRatio") + + xlab("") + + ylab("GOterm") + + #ggtitle("GO enrichment for unique DE genes in PFC") + + # facet_wrap(~panel, scales="free", labeller = as_labeller(facet_names)) + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("01_Figure2_GO_DE.pdf", g, width = 8, height = 8) + + +# order of terms +top_hub_comb$GeneSet <- factor(top_hub_comb$GeneSet, levels = top_hub_comb$GeneSet[14:1]) + +# dotplot terms hub +g <- ggplot(top_hub_comb) + + geom_point(aes(y = reorder(GeneSet,-`my adj p BH`), + x = group, + color = `[-log10 fdr`, + size = `Genes ratio`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "GeneRatio") + + xlab("") + + ylab("GOterm") + + #ggtitle("GO enrichment for unique hub genes in PFC") + + # facet_wrap(~panel, scales="free", labeller = as_labeller(facet_names)) + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("01_Figure2_GO_hub.pdf", g, width = 8, height = 8) + diff --git a/04_PlotsManuscript/plots_v2/02_Figure3_boxplotAbcd1.R b/04_PlotsManuscript/plots_v2/02_Figure3_boxplotAbcd1.R new file mode 100644 index 0000000..7b238e6 --- /dev/null +++ b/04_PlotsManuscript/plots_v2/02_Figure3_boxplotAbcd1.R @@ -0,0 +1,64 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# Expression level plot Abcd1 in all brain regions + + +library(stringr) +library(ggplot2) + +regions <- c("AMY", "CER", "dCA1", "dDG", "PFC", "PVN", "vCA1", "vDG") + +ensembl_abcd1 <- "ENSMUSG00000031378" + +# Panel A: Expression values +df <- data.frame("region" = character(), + "treatment" = character(), + "sample" = character(), + "expression" = numeric()) + +for (reg in regions){ + + # read vsd normalized data --> better raw data? + data_exp <- read.table(paste0("~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/02_", + reg, "_deseq2_expression_vsd.txt"), + header = TRUE, sep = "\t") + # subset Abcd1 row + abcd1 <- data_exp[ensembl_abcd1,] + + # get column indices for cntrl samples + indices_cntrl <- str_detect(colnames(data_exp), "CNTRL") + + # df for cntrl samples + exp_cntrl <- data.frame("expression" = t(abcd1[,indices_cntrl]), + "sample" = rownames(t(abcd1[,indices_cntrl]))) %>% + rename(expression = ENSMUSG00000031378) %>% + mutate("treatment" = "CNTRL", "region" = reg) + df <- rbind(df, exp_cntrl) + + # df for dex samples + exp_dex <- data.frame("expression" = t(abcd1[,!indices_cntrl]), + "sample" = rownames(t(abcd1[,!indices_cntrl]))) %>% + rename(expression = ENSMUSG00000031378) %>% + mutate("treatment" = "DEX", "region" = reg) + df <- rbind(df, exp_dex) + +} + +df$treatment <- factor(df$treatment) +df$region <- factor(df$region) + +ggplot(df, aes(x = region, y = expression, fill = treatment)) + + geom_boxplot() + + scale_fill_manual("", + breaks = c("CNTRL", "DEX"), + labels = c("Baseline", "Treatment"), + values = c("#B0BFBB", "#46866E")) + + theme_light() + + theme(text = element_text(size= 14)) + + xlab("Brain Region") + + ylab("Norm. Expression Level") + +ggsave(filename = "02_Figure3_boxplotAbcd1.pdf", width = 12, height = 8) diff --git a/04_PlotsManuscript/plots_v2/03_Figure3_enrichmentAbcd1.R b/04_PlotsManuscript/plots_v2/03_Figure3_enrichmentAbcd1.R new file mode 100644 index 0000000..073bfbf --- /dev/null +++ b/04_PlotsManuscript/plots_v2/03_Figure3_enrichmentAbcd1.R @@ -0,0 +1,52 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# Pathway enrichment dotplot for manuscript +# Abcd1 and neighbourhood in diff network of PFC + +library(readxl) + +# 1. Pathway enrichment for Abcd1 and neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/Abcd1/Abcd1 gene neighbourhood_FUMA_gene2func67210/Abcd1_diffnetwork_FUMA.xlsx", + sheet = "KEGG AND REACTOME") + +data <- arrange(data, desc(`[-log10FDR]`)) +data <- data[1:20,] + + +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# Ii from "Polymerase II" back to upper case +data$GeneSet <- str_replace_all(data$GeneSet, "Reactome", "Reactome:") +data$GeneSet <- str_replace_all(data$GeneSet, "Kegg", "KEGG:") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + +# color palette +colfunc <- colorRampPalette(c("#D45E60", "#0F5057")) + +# dotplot enrichment +g <- ggplot(data) + + geom_point(aes(y = reorder(GeneSet,`Genes ratio`), + x = `Genes ratio`, + color = `[-log10FDR]`, + size = `OR[log2(a*d)-log2(b*c)]`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("03_Figure3_enrichmentAbcd1.pdf", g, width = 8, height = 8) diff --git a/04_PlotsManuscript/plots_v2/04_Figure4_enrichmentDE.R b/04_PlotsManuscript/plots_v2/04_Figure4_enrichmentDE.R new file mode 100644 index 0000000..206bc65 --- /dev/null +++ b/04_PlotsManuscript/plots_v2/04_Figure4_enrichmentDE.R @@ -0,0 +1,54 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# vCA1 unique DE genes enrichments + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure III_vCA1/DE vCA1/FUMA_gene2func67310 (2)/vCA1 Unique DE genes_FUMA enrichments.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-log10 of FDR`)) +data <- data[1:20,] + + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# color palette +colfunc <- colorRampPalette(c("#D45E60", "#0F5057")) + +# dotplot enrichment +g <- ggplot(data) + + geom_point(aes(y = reorder(GeneSet,`Genes ratio`), + x = `Genes ratio`, + color = `[-log10 of FDR`, + size = `OR[log2(a*d)-log2(b*c)]`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("04_Figure4_enrichmentDE.pdf", g, width = 8, height = 8) diff --git a/04_PlotsManuscript/plots_v2/05_Figure4_enrichmentDENeighbours.R b/04_PlotsManuscript/plots_v2/05_Figure4_enrichmentDENeighbours.R new file mode 100644 index 0000000..93a2a1d --- /dev/null +++ b/04_PlotsManuscript/plots_v2/05_Figure4_enrichmentDENeighbours.R @@ -0,0 +1,56 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# vCA1 unique DE genes with neighbours enrichments + + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes and their diff neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure III_vCA1/network vCA1/Unique DEs and their diff neighbours_FUMA_gene2func67313/vCA1 Unique DE genes+diff neighbours_FUMA enrichments.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-LOG10 of FDR`)) +data <- data[1:20,] + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# color palette +colfunc <- colorRampPalette(c("#D45E60", "#0F5057")) + +# dotplot enrichment +g <- ggplot(data) + + geom_point(aes(y = reorder(GeneSet,`Genes ratio`), + x = `Genes ratio`, + color = `[-LOG10 of FDR`, + #color = `my adj p BH`, + size = `OR[log2(a*d)-log2(b*c)]`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("05_Figure4_enrichmentDENeighbours.pdf", g, width = 8, height = 8) diff --git a/04_PlotsManuscript/plots_v2/06_Figure4_GWASenrichment.R b/04_PlotsManuscript/plots_v2/06_Figure4_GWASenrichment.R new file mode 100644 index 0000000..77e1004 --- /dev/null +++ b/04_PlotsManuscript/plots_v2/06_Figure4_GWASenrichment.R @@ -0,0 +1,43 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GWAS enrichment plot for manuscript +# vCA1 + +library(readxl) +library(dplyr) +library(ggplot2) +library(ggpubr) + +# 1. Pathway enrichment for Abcd1 and neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure III_vCA1/network vCA1/Unique DEs and their diff neighbours_FUMA_gene2func67313/vCA1 Unique DE genes+diff neighbours_FUMA enrichments.xlsx", + sheet = "GWAS") + +data <- arrange(data, desc(`[-log10 of FDR`)) +data <- data[data$`my adj p BH` <= 0.05,] +data <- data[!is.na(data$`my adj p BH`),] + +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + +# color palette +colfunc <- colorRampPalette(c("#D45E60", "#0F5057")) + +# dotplot enrichment +g <- ggplot(data) + + geom_point(aes(y = reorder(GeneSet,`Genes ratio`), + x = `Genes ratio`, + color = `[-log10 of FDR`, + size = `OR[log2(a*d)-log2(b*c)]`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("06_Figure4_GWASenrichment.pdf", g, width = 8, height = 5) diff --git a/04_PlotsManuscript/plots_v2/07_Figure5_GWASenrichmentTcf4.R b/04_PlotsManuscript/plots_v2/07_Figure5_GWASenrichmentTcf4.R new file mode 100644 index 0000000..5c2c644 --- /dev/null +++ b/04_PlotsManuscript/plots_v2/07_Figure5_GWASenrichmentTcf4.R @@ -0,0 +1,45 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GWAS enrichment plot for manuscript +# Tcf4 neighbours? + +library(readxl) +library(dplyr) +library(ggplot2) +library(ggpubr) + +# 1. Pathway enrichment for Abcd1 and neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure IV_GWAS/TCF4/FUMA_diffTcf4PFCnetwork_gene2func68788/FUMA_diffPFCTcf4.xlsx", + sheet = "GWAS") + +data <- arrange(data, desc(`[-log10]`)) +data$`my adj p` <- as.numeric(data$`my adj p`) +data <- data[data$`my adj p` <= 0.05,] +data <- data[!is.na(data$`my adj p`),] + +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# color palette +colfunc <- colorRampPalette(c("#D45E60", "#0F5057")) + +# dotplot enrichment +g <- ggplot(data) + + geom_point(aes(y = reorder(GeneSet,`Genes ratio`), + x = `Genes ratio`, + color = `[-log10]`, + size = `OR[log2(a*d)-log2(b*c)]`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("07_Figure5_GWASenrichmentTcf4.pdf", g, width = 8, height = 6) diff --git a/04_PlotsManuscript/plots_v2/08_Figure5_enrichmentDiffNet.R b/04_PlotsManuscript/plots_v2/08_Figure5_enrichmentDiffNet.R new file mode 100644 index 0000000..e27d9ff --- /dev/null +++ b/04_PlotsManuscript/plots_v2/08_Figure5_enrichmentDiffNet.R @@ -0,0 +1,60 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GO enrichment plot for manuscript +# Tcf4 differential network in PFC + +library(readxl) +library(dplyr) +library(stringr) +library(ggpubr) + +# 1. GO enrichment for vCA unique DE genes and their diff neighbours + +data <- read_xlsx(path = "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure IV_GWAS/TCF4/FUMA_diffTcf4PFCnetwork_gene2func68788/FUMA_diffPFCTcf4.xlsx", + sheet = "GO bp") + +data <- arrange(data, desc(`[-log10]`)) +data <- data[1:25,] + +# remove the "GO_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, '^GO_', ' ') +# remove the "_" from GO terms +data$GeneSet <- str_replace_all(data$GeneSet, "_", " ") +# only capitalize first letter per word +data$GeneSet <- str_to_title(data$GeneSet) +# of and to should start with lower case +data$GeneSet <- str_replace_all(data$GeneSet, "Of", "of") +data$GeneSet <- str_replace_all(data$GeneSet, "To", "to") +data$GeneSet <- str_replace_all(data$GeneSet, " In ", " in ") +data$GeneSet <- str_replace_all(data$GeneSet, " Into ", " into ") +# make some terms shorter with abbreviations +data$GeneSet <- str_replace_all(data$GeneSet, "Positive Regulation", "Pos. Reg.") +data$GeneSet <- str_replace_all(data$GeneSet, "Negative Regulation", "Neg. Reg.") +# Ii from "Polymerase II" back to upper case +data$GeneSet <- str_replace_all(data$GeneSet, "Ii", "II") +# GeneSet as factor +data$GeneSet <- factor(data$GeneSet, levels = data$GeneSet) + + +# color palette +colfunc <- colorRampPalette(c("#D45E60", "#0F5057")) + +# dotplot enrichment +g <- ggplot(data) + + geom_point(aes(y = reorder(GeneSet,`Genes ratio`), + x = `Genes ratio`, + color = `[-log10]`, + #color = `my adj p BH`, + size = `OR[log2(a*d)-log2(b*c)]`)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +ggsave("08_Figure5_enrichmentDiffNet.pdf", g, width = 8, height = 8) diff --git a/04_PlotsManuscript/plots_v2/09_FigureS2_enrichmentDE.R b/04_PlotsManuscript/plots_v2/09_FigureS2_enrichmentDE.R new file mode 100644 index 0000000..f60134e --- /dev/null +++ b/04_PlotsManuscript/plots_v2/09_FigureS2_enrichmentDE.R @@ -0,0 +1,196 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GO enrichment for DE genes across all regions + +library(clusterProfiler) +library(org.Mm.eg.db) +library(ggplot2) +library(dplyr) +library(stringr) +library(writexl) + + +basepath <- "~/Documents/ownCloud/DexStim_RNAseq_Mouse/" + + +# 0. Read genes DE in all regions and background ----------------- +genes <- read.table(file = paste0(basepath, "tables/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_entrezID.txt"), + header = FALSE)[,1] + +# background are all genes in out dataset +background <- read.table(file = paste0(basepath, "tables/06_background_entrezID.txt"), + header = FALSE)[,1] + + + +# 1.1 GO enrichment for genes DE in all regions --------------------- + +# GO enrichment +min_genes <- round(length(genes)/10) +ego <- enrichGO(gene = as.character(genes), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego, n = 20) + + +# 1.2 GO enrichment for upregulated genes DE in all regions --------------------- +genes_up <- read.table(file = paste0(basepath, "tables/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_up_entrezID.txt"), + header = FALSE)[,1] + +# GO enrichment +min_genes <- round(length(genes_up)/10) +ego_up <- enrichGO(gene = as.character(genes_up), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego_up, n = 20) + + + + +# 1.3 GO enrichment for downregulated genes DE in all regions --------------------- +genes_down <- read.table( + file = paste0(basepath, + "tables/06_overlap_AMY-CER-PFC-PVN-dDG-vDG-dCA1-vCA1_down_entrezID.txt"), + header = FALSE)[,1] + +# GO enrichment +min_genes <- length(genes_down)/10 +ego_down <- enrichGO(gene = as.character(genes_down), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego_down, n = 20) + + +# 1.4 Merge dataframes from all, up and downregulated genes -------------------- + +data_go <- left_join(ego, ego_down, by = c("ID", "Description"), + suffix = c(".all", ".down")) +data_go <- left_join(data_go, ego_up, by = c("ID", "Description"), + suffix = c("", ".up")) +# apparently not all entrez ids are valid --> not all taken into account in enrichment analysis +ngenes_rel <- 165 +nback_rel <- 12089 +# add columns relevant for gene ratio and odds ratio +data_go <- data_go %>% + mutate(n_genes = as.numeric(str_extract(data_go$BgRatio.all, "^\\d+"))) %>% + mutate(gr = Count.all/n_genes*100) %>% + mutate(b = ngenes_rel - Count.all, + c = n_genes - Count.all, + d = nback_rel - Count.all, + or = log2(Count.all*d)-log2(b*c)) + +data_heat <- data_go[1:25,c("Description", "p.adjust.down", "p.adjust")] %>% + tidyr::pivot_longer(cols = p.adjust.down:p.adjust) +data_heat$Description <- factor(data_heat$Description, + levels = levels(reorder(data_go[1:25,]$Description, data_go[1:25,]$gr))) + +# 1.5 Plot ------------------------------- + +#data_go[1:25,]$Description[order(data_go[1:25,]$Description)] + +g <- ggplot(data_go[1:25,]) + + geom_point(aes(y = reorder(Description,gr), + #y = Description, + x = gr, + color = -log10(p.adjust.all), + size = or)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) + +hm.1 <- + ggplot(data = data_heat, aes( + x = Description, + y = name, + fill = value <= 0.01 + )) + + geom_tile() + + scale_y_discrete(name ="sig. GO term", + limits=c("p.adjust","p.adjust.down"), + labels=c("upreg.", "downreg.")) + + scale_fill_manual( + name = "GO term sig.", + values = c("darkgrey", "black") + ) + + ylab("sig. GO term") + + theme_light() + + theme( + axis.title.y = element_text(size = 14), + axis.text.x = element_text(size = 12), + axis.title.x = element_text(size =14), + axis.text.y = element_text(size = 12), + #legend.position = "top", + legend.text = element_text(size = 10) + ) + + coord_flip() + +# combined plot +comb <- ggarrange(g, + hm.1 + + theme(axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank() ), + nrow = 1, + widths = c(2.5,1)) + +# Save plot +ggexport( + comb, + filename = "09_FigureS2_enrichmentDE.pdf", + height = 10, + width = 13 +) + + +# bring table to a format similar as other tables +tf <- data_go[1:25,] + +# add Category column +tf$Category <- "GO_bp" +# add GeneSet column +tf$GeneSet <- tf$Description %>% + stringr::str_replace_all(" ", "_") %>% + stringr::str_to_upper() +tf$GeneSet <- paste0("GO_", tf$GeneSet) +# add -log10 transformed FDR pvalue +tf$`[-log10 FDR]` <- -log10(tf$p.adjust.all) + +# subset to columns that should be printed +tf <- tf %>% + dplyr::select(Category, GeneSet, n_genes, Count, b, c, d, gr, or, pvalue.all, + p.adjust.all, `[-log10 FDR]`, geneID.all) %>% + dplyr::rename(N_genes = n_genes, + N_overlap_A = Count, + `Input that does not overlap_B` = b, + `GO term genes - overlap_C` = c, + `Not GO term genes and not in my geneset_D` = d, + `Genes ratio` = gr, + `OR[log2(a*d)-log2(b*c)]` = or, + p = pvalue.all, + adjP = p.adjust.all, + genes = geneID.all) + +# save to a excel file +write_xlsx(tf, "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/GOterms_DEgenesAllRegions.xlsx") diff --git a/04_PlotsManuscript/plots_v2/10_FigureS2_enrichmentHub.R b/04_PlotsManuscript/plots_v2/10_FigureS2_enrichmentHub.R new file mode 100644 index 0000000..82f0c7a --- /dev/null +++ b/04_PlotsManuscript/plots_v2/10_FigureS2_enrichmentHub.R @@ -0,0 +1,139 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 02.03.2022 +## Author: Nathalie +################################################## +# GO enrichment for hub genes across all regions + + +library(data.table) +library(ggplot2) +library(dplyr) +library(tidyverse) +library(org.Mm.eg.db) +library(clusterProfiler) +library(ggpubr) +library(writexl) + +regions <- + c("AMY", "PFC", "PVN", "CER", "vDG", "dDG", "vCA1", "dCA1") + + +# 1. Read DE tables from all regions ---------- + +list_reg_sig <- list() +list_genes_sig <- list() + +for (reg in regions) { + res <- + fread( + file = paste0( + "~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/coExpression_kimono/03_AnalysisFuncoup/", + "/04_", + reg, + "_funcoup_differential_nodebetweennessNorm_betacutoff0.01.csv" + ), + sep = "," + ) + na_indices <- which(is.na(res$nodebetweenness_norm)) + res$padj[na_indices] <- 0 + res_sig <- res[res$nodebetweenness_norm >= 1.0, ] + list_reg_sig[[reg]] <- res_sig + list_genes_sig[[reg]] <- res_sig$ensembl_id +} + +hub_shared <- Reduce(intersect, list_genes_sig) +genes <- mapIds(org.Mm.eg.db, keys = hub_shared, keytype = "ENSEMBL", column="ENTREZID") + +# background are all genes in out dataset +background <- read.table(file = paste0("~/Documents/ownCloud/DexStim_RNAseq_Mouse/tables/06_background_entrezID.txt"), + header = FALSE)[,1] + + +# 1.1 GO enrichment for genes DE in all regions --------------------- + +# GO enrichment +min_genes <- round(length(genes)/10) +ego <- enrichGO(gene = as.character(genes), + universe = as.character(background), + OrgDb = org.Mm.eg.db, + ont = "BP", + pAdjustMethod = "BH", + minGSSize = min_genes, # min number of genes associated with GO term + maxGSSize = 10000, # max number of genes associated with GO term + readable = TRUE)@result +head(ego, n = 20) + + +# 1.4 Merge dataframes from all, up and downregulated genes -------------------- + +# apparently not all entrez ids are valid --> not all taken into account in enrichment analysis +ngenes_rel <- 7 +nback_rel <- 12089 +# add columns relevant for gene ratio and odds ratio +data_go <- ego %>% + mutate(n_genes = as.numeric(str_extract(ego$BgRatio, "^\\d+"))) %>% + mutate(gr = Count/n_genes*100) %>% + mutate(b = ngenes_rel - Count, + c = n_genes - Count, + d = nback_rel - Count, + or = log2(Count*d)-log2(b*c)) + +data_go$or[data_go$or == Inf] <-12 + +# 1.5 Plot ------------------------------- + +g <- ggplot(data_go[1:25,]) + + geom_point(aes(y = reorder(Description,gr), + #y = Description, + x = gr, + color = -log10(p.adjust), + size = or)) + + scale_color_gradient(name = "-log10(FDR)", + low = "#D45E60", high = "#0F5057") + + scale_size_continuous(name = "OddsRatio") + + xlab("% GeneRatio") + + ylab("GOterm") + + theme_light() + + theme(text = element_text(size= 14)) +print(g) +# Save plot +ggsave( + "10_FigureS2_enrichmentHub.pdf", + g, + height = 10, + width = 10 +) + + +# bring table to a format similar as other tables +tf <- data_go[1:25,] + +# add Category column +tf$Category <- "GO_bp" +# add GeneSet column +tf$GeneSet <- tf$Description %>% + stringr::str_replace_all(" ", "_") %>% + stringr::str_to_upper() +tf$GeneSet <- paste0("GO_", tf$GeneSet) +# add -log10 transformed FDR pvalue +tf$`[-log10 FDR]` <- -log10(tf$p.adjust) + +# subset to columns that should be printed +tf <- tf %>% + dplyr::select(Category, GeneSet, n_genes, Count, b, c, d, gr, or, pvalue, + p.adjust, `[-log10 FDR]`, geneID) %>% + dplyr::rename(N_genes = n_genes, + N_overlap_A = Count, + `Input that does not overlap_B` = b, + `GO term genes - overlap_C` = c, + `Not GO term genes and not in my geneset_D` = d, + `Genes ratio` = gr, + `OR[log2(a*d)-log2(b*c)]` = or, + p = pvalue, + adjP = p.adjust, + genes = geneID) + +# save to a excel file +write_xlsx(tf, "~/Documents/ownCloud/DexStim_RNAseq_Mouse/manuscript/Figures/Figure II_PFC/GOterms_hubsAllRegions.xlsx") + diff --git a/04_PlotsManuscript/plots_v2/11_Figure5_networkTcf4.R b/04_PlotsManuscript/plots_v2/11_Figure5_networkTcf4.R new file mode 100644 index 0000000..afaf7e1 --- /dev/null +++ b/04_PlotsManuscript/plots_v2/11_Figure5_networkTcf4.R @@ -0,0 +1,348 @@ +################################################## +## Project: DexStim Mouse Brain +## Date: 03.03.2022 +## Author: Nathalie +################################################## +# Tcf4 network in vDG and vCA1 with igraph to pdf + + +library(igraph) +library(data.table) +library(dplyr) +library(org.Mm.eg.db) +library(RColorBrewer) + + +### FUNCTIONS #################################### + +network_data <- function(regions, network_type = "diff"){ + + # Read data tables from all required regions + list_network <- list() + + for (reg in regions){ + file_path <- paste0( + basepath, + "tables/coExpression_kimono/03_AnalysisFuncoup/", + "04_singleRegion_", + reg, + "_filtered_", + network_type, + "Network.csv" + ) + # Read data + if (network_type == "diff") { + data <- + fread(file = file_path) %>% + dplyr::select(target, predictor, beta_mean.base, beta_mean.dex, z, p_adj) + #dplyr::select(target, predictor, z) %>% + #dplyr::rename_with(.fn = ~ reg, .cols = z) + + cols <- colnames(data)[3:6] + data <- data %>% + dplyr::rename_with(.fn = ~paste0(., ".", reg), .cols = all_of(cols) ) + } else { + data <- + fread(file = file_path) %>% + dplyr::select(target, predictor, beta_mean) + # cols <- colnames(data)[3] + data <- data %>% + dplyr::rename_with(.fn = ~ reg, .cols = beta_mean) + # dplyr::rename_with(.fn = ~paste0(., ".", reg), .cols = all_of(cols) ) + } + list_network[[reg]] <- data + } + + data_joined <- list_network %>% + purrr::reduce(full_join, by = c("target","predictor")) + + return(data_joined) +} + + +# DE genes of required brain regions +de_genes <- function(regions){ + + list_de <- list() + # Read and subset DE genes for coloring + for (reg in regions){ + df_de <- fread(paste0(basepath, "tables/02_", + reg,"_deseq2_Dex_1_vs_0_lfcShrink.txt")) + na_indices <- which(is.na(df_de$padj)) + df_de$padj[na_indices] <- 1 + df_de <- df_de[df_de$padj <= 0.1,] + + df_de <- df_de %>% + dplyr::select(Ensembl_ID, padj) %>% + dplyr::rename_with(.fn = ~ reg, .cols = padj) + + list_de[[reg]] <- df_de + } + + de_joined <- list_de %>% + purrr::reduce(full_join, by = c("Ensembl_ID")) + + return(de_joined) +} + + +# hub genes of required brain regions +hub_genes <- function(regions){ + + list_hub <- list() + # Read and subset hub genes for coloring + for (reg in regions) { + df_hub <- + fread( + paste0( + basepath, + "tables/coExpression_kimono/03_AnalysisFuncoup/04_", + reg, + "_funcoup_differential", + "_nodebetweennessNorm_betacutoff0.01.csv" + ) + ) %>% + filter(nodebetweenness_norm >= 1) %>% + dplyr::select(ensembl_id, nodebetweenness_norm) %>% + dplyr::rename_with(.fn = ~ reg, .cols = nodebetweenness_norm) + + list_hub[[reg]] <- df_hub + } + + hub_joined <- list_hub %>% + purrr::reduce(full_join, by = c("ensembl_id")) + + return(hub_joined) +} + + +# bring input genes to correct format +reformat_genes <- function(list_genes){ + if (!startsWith(list_genes[1], "ENSMUSG")){ + format_gene <- sapply(list_genes, stringr::str_to_title) + format_gene <- mapIds(org.Mm.eg.db, keys = format_gene, column = "ENSEMBL", keytype = "SYMBOL") + ids_na <- names(format_gene)[which(is.na(format_gene))] + #print(ids_na) + if (length(ids_na) > 0) { + showNotification(paste("No Ensembl ID found for following genes: ", + ids_na), type = "message", duration = 5) + } + format_gene <- format_gene[which(!is.na(format_gene))] + } else { + format_gene <- list_genes + } + return(format_gene) +} + +# subset network to gene of interest and if required also +# neighbours of genes +subset_network <- function(network_dt, subset_gene, neighbours){ + + if (neighbours) { + e_interest <- network_dt %>% + filter(from %in% subset_gene | to %in% subset_gene) + e_interest <- unique(c(e_interest$from, e_interest$to)) + network_subset <- network_dt %>% + filter(from %in% e_interest & to %in% e_interest) + } else { + network_subset <- network_dt %>% + filter(from %in% subset_gene & to %in% subset_gene) + } + + return(network_subset) +} + + +# create network function +network <- function(data, de_genes, hub_genes, input_genes, mode = "diff", + id_type = "Gene Symbol", neighbours = TRUE){ + + # input genes + print(input_genes) + + # filter data + data <- data %>% + dplyr::rename(from = predictor, to = target) %>% + dplyr::filter(from != to) + data <- subset_network(data, input_genes, neighbours) + + # color palettes for edges and nodes + # edge palette --> assumes that data stores only target, predictor and z scores + # edge_colors <- brewer.pal(ncol(data)-1, "Blues")[2:(ncol(data)-1)] + + # Edges + if(mode == "baseline" | mode == "treatment"){ + # color palettes for edges and nodes + edge_colors <- brewer.pal(ncol(data)-1, "Blues")[2:(ncol(data)-1)] + + # count regions per diff edge that are not na + data$edge_reg <- apply(data[,3:ncol(data)], 1, function(x) names(which(!is.na(x))) ) + data$count_reg <- sapply(data$edge_reg, length) + data$title <- paste0("
Connection in: ", sapply(data$edge_reg, paste, collapse = ", "), + "
") + # assign color to edges according to number of regions + data$c <- sapply(data$count_reg, function(x) edge_colors[x]) + + print(head(data)) + # df for edges + relations <- data.table(from=data$from, + to=data$to, + #beta = data$beta_mean, + color = c, + title = data$title) + # df for edge legend + ledges <- data.frame(color = edge_colors, + label = 1:(ncol(data)-6), + font.align = "top") + } else { + # remove columns + data <- data %>% dplyr::select(-contains("beta"), -contains("p_adj")) + + # color palettes for edges and nodes + # edge palette --> assumes that data stores only target, predictor and z scores + edge_colors <- brewer.pal(ncol(data)-1, "Blues")[2:(ncol(data)-1)] + + # count regions per diff edge that are not na + data$edge_reg <- apply(data[,3:ncol(data)], 1, function(x) names(which(!is.na(x))) ) + data$count_reg <- sapply(data$edge_reg, length) + data$title <- paste0("Diff. co-expressed in: ", sapply(data$edge_reg, paste, collapse = ", "), + "
") + # assign color to edges according to number of regions + data$c <- sapply(data$count_reg, function(x) edge_colors[x]) + + print(head(data)) + # df for edges + relations <- data.table(from=data$from, + to=data$to, + #z = data$z, + #p_adj = data$p_adj, + color = data$c, + title = data$title) + # df for edge legend + ledges <- data.frame(color = edge_colors, + label = 1:(ncol(data)-6), + font.align = "top") + } + print(nrow(relations)) + + + # Nodes + # get unique nodes with correct id + nodes <- data.frame("id" = unique(union( + c(relations$from, relations$to), + input_genes + )), stringsAsFactors = FALSE) + if (id_type == "Gene Symbol"){ + print(nodes$id) + nodes$label <- mapIds(org.Mm.eg.db, keys = nodes$id, + column = "SYMBOL", keytype = "ENSEMBL") + } else { + nodes$label <- nodes$id + } + + # count regions where gene is DE or/and hub + nodes_de <- left_join(x = nodes, y = de_genes, + by = c("id" = "Ensembl_ID")) + nodes_hub <- left_join(x = nodes, y = hub_genes, + by = c("id" = "ensembl_id")) + + nodes$de_reg <- apply(nodes_de[,3:ncol(nodes_de)], 1, + function(x) names(which(!is.na(x))) ) + nodes$de_count <- sapply(nodes$de_reg, length) + + nodes$hub_reg <- apply(nodes_hub[,3:ncol(nodes_hub)], 1, + function(x) names(which(!is.na(x))) ) + nodes$hub_count <- sapply(nodes$hub_reg, length) + + # set color according to DE/hub status + nodes$color <- rep("darkblue", nrow(nodes_de)) + nodes$color[nodes$de_count > 0] <- "orange" + nodes$color[nodes$hub_count > 0] <- "darkred" + nodes$color[nodes$de_count > 0 & nodes$hub_count > 0] <- "purple" + + #nodes$opacity <- nodes$de_count + nodes$hub_count + #nodes$opacity <- nodes$opacity/max(nodes$opacity) + + nodes$title <- paste0("",nodes$label,"",
+ "
DE in regions: ", sapply(nodes$de_reg, paste, collapse = ", "),
+ "
Hub in regions: ", sapply(nodes$hub_reg, paste, collapse = ", "),"