Skip to content

Commit

Permalink
revisions_Sep2022
Browse files Browse the repository at this point in the history
  • Loading branch information
ngerstner committed Sep 21, 2022
1 parent 96e9c09 commit b6af093
Showing 44 changed files with 5,561 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.pdf
*.png
*.ai
*.csv
226 changes: 226 additions & 0 deletions 01_DiffExp/13_GRexpression_plots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
##################################################
## Project: DexStim Mouse Brain
## Date: 06.09.2022
## Author: Nathalie
##################################################
# Plot GR expression level at control level in all brain regions
# and check if correlation with number of DE genes exists
# Suggestion of reviewer

library(data.table)
library(tidyr)
library(dplyr)
library(stringr)

basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse"
folder_table <- paste0(basedir,"/tables")
mean_exp_file <- file.path(folder_table, "12_meanExp_regionDex.csv")
files <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE)

# IDs
gene_symbol_gr <- "Nr3c1"
gene_symbol_mr <- "Nr3c2"
ensembl_gr <- "ENSMUSG00000024431"
ensembl_mr <- "ENSMUSG00000031618"


## 1. Read mean expression data
data <- read.table(mean_exp_file, sep = ";", header = TRUE) %>%
dplyr::select(contains("CNTRL"), gene_symbol) %>%
dplyr::rename_all(funs(str_replace_all(., "_CNTRL", "")))
gr_mr <- data[data$gene_symbol == gene_symbol_gr | data$gene_symbol == gene_symbol_mr,] %>% # subset to GR and MR
tibble::remove_rownames() %>%
tibble::column_to_rownames(var = "gene_symbol")
gr_mr <- as.data.frame(t(gr_mr)) %>%
tibble::rownames_to_column("region")

## 2. Number of DE genes per region
# read files with DE results
regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files)
expr_list <- lapply(files, function(x) fread(x))
names(expr_list) <- regions_files
# concatenate DE results to get those for GR and MR
de_df <- bind_rows(expr_list, .id="region") %>%
dplyr::filter(V1 == ensembl_gr | V1 == ensembl_mr) %>%
dplyr::mutate(gene = case_when(
V1 == ensembl_gr ~ "Nr3c1",
V1 == ensembl_mr ~ "Nr3c2"
)) %>%
dplyr::select(region, gene, log2FoldChange, padj) %>%
pivot_wider(names_from = gene, values_from = c(log2FoldChange,padj))
# filter each df to genes with FDR smaller 0.1
expr_list <- lapply(expr_list, function(x) dplyr::filter(x, padj <= 0.1))
df_degenes <- bind_rows(expr_list, .id="region") %>%
mutate(up_down = ifelse(log2FoldChange > 0, "up", "down"))
n_degenes <- df_degenes %>%
group_by(region) %>%
count()
n_updowngenes <- df_degenes %>%
group_by(region, up_down) %>%
count()

## 3. Join mean expression values with number of DE genes
gr_mr <- inner_join(gr_mr, n_degenes, by = "region")
gr_mr <- dplyr::rename(gr_mr, nr_de = n)
gr_mr <- inner_join(gr_mr, de_df, by="region")

gr_mr_updown <- inner_join(gr_mr, n_updowngenes, by = "region") %>%
mutate(up_down = factor(gr_mr_updown$up_down, levels = c("up", "down")))

## 4. Plot expression of GR and MR against number of DE genes
# GR
ggplot(gr_mr, aes(x=Nr3c1, y=nr_de,
size=log2FoldChange_Nr3c1,
color=-log10(padj_Nr3c1),
label=region)) +
geom_point() +
geom_text(hjust=-0.25, vjust=-0.25,
size=4, color="black") +
xlim(min(gr_mr$Nr3c1),max(gr_mr$Nr3c1)+0.1) +
xlab("Normalised Expression Level GR") +
ylab("Number of DE genes") +
scale_color_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,13)) +
scale_size_continuous(name="log2(FoldChange)") +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GR.pdf"),
width = 8,
height = 6)

# MR
ggplot(gr_mr, aes(x=Nr3c2, y=nr_de,
size=log2FoldChange_Nr3c2,
color=-log10(padj_Nr3c2),
label=region)) +
geom_point() +
geom_text(hjust=-0.25, vjust=-0.25,
size=4, color="black") +
xlim(min(gr_mr$Nr3c2),max(gr_mr$Nr3c2)+0.1) +
xlab("Normalised Expression Level MR") +
ylab("Number of DE genes") +
scale_color_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,13)) +
scale_size_continuous(name="log2(FoldChange)") +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_MR.pdf"),
width = 8,
height = 6)


## 5. Plot expression of GR and MR against number of DE genes - stratified by up and downregulation
# GR
ggplot(gr_mr_updown, aes(x=Nr3c1, y=n,
size=log2FoldChange_Nr3c1,
color=-log10(padj_Nr3c1),
label=region)) +
geom_point() +
geom_text(hjust=-0.25, vjust=-0.25,
size=4, color="black") +
xlim(min(gr_mr_updown$Nr3c1),max(gr_mr_updown$Nr3c1)+0.1) +
xlab("Normalised Expression Level GR") +
ylab("Number of DE genes") +
scale_color_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,13)) +
scale_size_continuous(name="log2(FoldChange)") +
facet_wrap(~up_down, nrow = 2, scales = "free",
labeller = labeller(up_down =
c("down" = "Downregulated genes",
"up" = "Upregulated genes")
)) +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GR_updown.pdf"),
width = 8,
height = 6)

# MR
ggplot(gr_mr_updown, aes(x=Nr3c2, y=n,
size=log2FoldChange_Nr3c2,
color=-log10(padj_Nr3c2),
label=region)) +
geom_point() +
geom_text(hjust=-0.25, vjust=-0.25,
size=4, color="black") +
xlim(min(gr_mr_updown$Nr3c2),max(gr_mr_updown$Nr3c2)+0.1) +
xlab("Normalised Expression Level MR") +
ylab("Number of DE genes") +
scale_color_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,13)) +
scale_size_continuous(name="log2(FoldChange)") +
facet_wrap(~up_down, nrow = 2, scales = "free",
labeller = labeller(up_down =
c("down" = "Downregulated genes",
"up" = "Upregulated genes")
)) +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_MR_updown.pdf"),
width = 8,
height = 6)


## 6. Plot GR/MR ratio against number of DE genes - stratified by up and downregulation
gr_mr_updown <- gr_mr_updown %>%
mutate(grmr_ratio = Nr3c1/Nr3c2)

# GR/MR ratio
ggplot(gr_mr_updown, aes(x=grmr_ratio, y=n,
size=log2FoldChange_Nr3c2,
color=-log10(padj_Nr3c2),
label=region)) +
geom_point() +
geom_text(hjust=-0.25, vjust=-0.25,
size=4, color="black") +
xlim(min(gr_mr_updown$grmr_ratio),max(gr_mr_updown$grmr_ratio)+0.1) +
xlab("Normalised Expression Level MR") +
ylab("Number of DE genes") +
scale_color_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,13)) +
scale_size_continuous(name="log2(FoldChange)") +
facet_wrap(~up_down, nrow = 2, scales = "free",
labeller = labeller(up_down =
c("down" = "Downregulated genes",
"up" = "Upregulated genes")
)) +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/13_Reviewer1_GRMRratio_updown.pdf"),
width = 8,
height = 6)
147 changes: 147 additions & 0 deletions 01_DiffExp/14_CompGWAS_DE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
##################################################
## Project: DexStim Mouse Brain
## Date: 07.09.2022
## Author: Nathalie
##################################################
# Compare DEs and DNs with previously identified GWAS risk genes
# of 5 psychiatric disorders (https://pubmed.ncbi.nlm.nih.gov/32152537/)

library(data.table)
library(tidyr)
library(dplyr)
library(stringr)
library(readxl)
library(fgsea)
library(biomaRt)
library(ggplot2)

# trait of interest
traits <- c("ADHD", "ASD", "BD", "MDD", "SCZ")
# trait <- "MDD"

# define pathes
basedir <- "/Users/nathalie_gerstner/Documents/ownCloud/DexStim_RNAseq_Mouse"
folder_table <- paste0(basedir,"/tables")
file_hmagma <- paste0(basedir, "/data/reviews/H-MAGMAv1.08_Adult_brain_output.xlsx")
files_de <- list.files(path = folder_table, pattern = "deseq2_Dex_0_vs_1_lfcShrink.txt$", full.names = TRUE)
regions_files <- sub(".*02_(\\w*)_deseq2.*","\\1",files_de)
file_background <- paste0(folder_table, "/06_background.txt")


## 1. Read DE genes from different brain regions
expr_list <- lapply(files_de, function(x) fread(x) %>% filter(padj <= 0.1))
names(expr_list) <- regions_files

de_list <- lapply(expr_list, function(x) x$V1)

# unique DE genes
data_unique <- bind_rows(expr_list, .id = "region") %>%
group_by(V1) %>%
summarise(region = list(region)) %>%
mutate(nr_regions = lengths(region)) %>%
mutate(unique = (nr_regions == 1)) %>%
filter(unique)

de_uni <- list()
for (r in regions_files){
de_uni[[r]] <- data_unique$V1[data_unique$region == r]
}

# background genes
background <- read.table(file_background,
row.names = NULL)


## 2. Mapping of mouse IDs to human IDs
# map mouse Ensembl Ids to human Ensembl Ids
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
# --> this is a workaround as the normal commands lead to errors
# --> one mouse ID can be mapped to multiple human IDs

de_human <- lapply(de_list, function(x) getLDS(attributes=c("ensembl_gene_id"),
filters="ensembl_gene_id", values=x, mart=mouse,
attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1)

de_uni_human <- lapply(de_uni, function(x) getLDS(attributes=c("ensembl_gene_id"),
filters="ensembl_gene_id", values=x, mart=mouse,
attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1)

background_human <- getLDS(attributes=c("ensembl_gene_id"),
filters="ensembl_gene_id", values=background$V1, mart=mouse,
attributesL=c("ensembl_gene_id"), martL=human)$Gene.stable.ID.1

res_list <- list()
res_list_uni <- list()
for (trait in traits){

## 2. Read GWAS/H-MAGMA data
data <- read_excel(file_hmagma, sheet=paste0("H-MAGMA_",trait)) %>%
arrange(ZSTAT) %>% # rank gene list according to zscore as suggested in paper
filter(GENE %in% background_human)
ranks <- data$ZSTAT
names(ranks) <- data$GENE


## 3. Run fgsea for Gene Set Enrichment Analysis
fgseaRes <- fgsea(pathways = de_human,
stats = ranks)
# sampleSize = 600)
res_list[[trait]] <- fgseaRes

# unique DE genes
fgseaRes_uni <- fgsea(pathways = de_uni_human,
stats = ranks)
# sampleSize = 600)
res_list_uni[[trait]] <- fgseaRes_uni

plotEnrichment(de_uni_human[["PFC"]],
ranks)

}


## 4. Plot GSEA p-values as heatmap from all traits and brain regions
df <- bind_rows(res_list, .id="trait")
df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests
ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) +
geom_tile() +
xlab("GWAS trait") +
ylab("Brain region") +
scale_fill_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,1)) +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEall.pdf"),
width = 8,
height = 6)


# plot for unique DE genes
df <- bind_rows(res_list_uni, .id="trait")
df$FDR <- p.adjust(df$pval, method = "fdr") # apply correction across all tests
ggplot(df, aes(x = trait, y = pathway, fill = -log10(FDR))) +
geom_tile() +
xlab("GWAS trait") +
ylab("Brain region") +
scale_fill_gradient(name = "-log10(FDR)",
low="lightgrey", high="#D45E60",
limits = c(0,1)) +
theme_light() +
theme(
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
ggsave(paste0(basedir, "/scripts_manuscript/04_PlotsManuscript/Revision/14_Reviewer2_1a_DEunique.pdf"),
width = 8,
height = 6)
Loading

0 comments on commit b6af093

Please sign in to comment.