-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ca70146
Showing
16 changed files
with
2,652 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Schizophrenia risk variants modulate transcription factor binding and gene expression in cortical cell types | ||
|
||
Schizophrenia is a complex neuropsychiatric disorder with a strong genetic component. Genome-wide association studies (GWAS) have identified numerous risk variants, but their functional impact on gene regulation remains largely unknown. We investigate the disruption and enhancement of transcription factor (TF) binding motifs by schizophrenia-associated GWAS SNPs in 15 cortical cell types of the human brain. By integrating single-nucleus ATAC-seq and RNA-seq data from 71 donors (36 affected by schizophrenia) with GWAS summary statistics, we identify specific TF motifs whose binding affinities are altered by risk and protective SNPs. We find that risk SNPs predominantly disrupt TF binding, while protective SNPs show a more balanced effect. Furthermore, we demonstrate that disrupted TF motifs can lead to altered expression of target genes, including NAGA in excitatory neurons and HLA-B in oligodendrocyte precursor cells. These genes have been previously implicated in schizophrenia and our study provides a mechanism for their dysregulation through altered TF binding. Our findings highlight the importance of considering cell type-specific effects and provide a genome-wide map of TF motif disruptions in schizophrenia, offering insights into the regulatory mechanisms underlying disease risk. | ||
|
||
This repository contains all scripts used for the analysis of TF binding disruption and enhancement through schizophrenia GWAS SNPs and the differential expression analysis of respective target genes between carriers and non-carriers of the respective alternative allele. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
## Identify marker features based on the peak matrix | ||
## only on SCZ cases and controls | ||
|
||
# run with conda environment sc-atac | ||
|
||
library(ArchR) | ||
library(dplyr) | ||
library(tibble) | ||
library(tidyr) | ||
library(RColorBrewer) | ||
library(BSgenome.Hsapiens.UCSC.hg38) | ||
|
||
# folder of ArchR project | ||
archr_folder <- "../ArchRSubset_SCZ" | ||
|
||
# 1. Read data | ||
|
||
projPM <- loadArchRProject(path = paste0(archr_folder,"/")) | ||
|
||
# 2. Check out available matrices, peak matrix was added to ArchR project in 14_1_markerFeatures.R, but marker features were not saved to object or file | ||
#projPM_peak <- addPeakMatrix(projPM) | ||
getAvailableMatrices(projPM) | ||
# [1] "GeneIntegrationMatrix" "GeneScoreMatrix" "MotifMatrix" | ||
# [4] "MotifMatrix_SCZ" "PeakMatrix" "TileMatrix" | ||
peak_mat <- getMatrixFromProject(projPM, useMatrix="PeakMatrix") | ||
min(peak_mat@colData$ReadsInPeaks) # 482 | ||
|
||
# 3. Add marker features for each cell type compared to all others | ||
|
||
markersPeaks <- getMarkerFeatures( | ||
ArchRProj = projPM, | ||
useMatrix = "PeakMatrix", | ||
groupBy = "celltypes_refined", | ||
bias = c("TSSEnrichment", "log10(nFrags)"), | ||
testMethod = "wilcoxon" | ||
) | ||
|
||
saveRDS(markersPeaks, "data/markersPeaks.rds") | ||
markersPeaks <- readRDS("data/markersPeaks.rds") | ||
|
||
markerList <- getMarkers(markersPeaks, | ||
cutOff = "FDR <= 0.05 & Log2FC >= 1") | ||
markerList | ||
markerList_GRanges <- getMarkers(markersPeaks, | ||
cutOff = "FDR <= 0.05 & Log2FC >= 1", | ||
returnGR = TRUE) | ||
markerList_GRanges | ||
|
||
# 3a. Plot number of marker peaks per cell type | ||
n_markers <- sapply(markerList, nrow) | ||
df_n_markers <- data.frame(celltype=names(n_markers), | ||
value=n_markers, row.names=NULL) | ||
write.csv(df_n_markers, "data/01_Number-MarkerPeaks.csv") | ||
|
||
# Barplot with number of marker peaks per celltype | ||
ggplot(df_n_markers, aes(x=celltype, y=value)) + | ||
geom_bar(stat="identity", fill="steelblue")+ | ||
geom_text(aes(label=value), vjust=1.6, color="white", size=3.5)+ | ||
xlab("") + | ||
ylab("Number of marker peaks") + | ||
scale_x_discrete(guide = guide_axis(angle = 45)) + | ||
theme_minimal() + | ||
theme( | ||
strip.text.x = element_text(size = 15), | ||
axis.title.x = element_text(size = 15), | ||
axis.title.y = element_text(size = 15), | ||
axis.text.x = element_text(size = 12), | ||
axis.text.y = element_text(size = 12), | ||
legend.text = element_text(size = 12), | ||
legend.title = element_text(size = 12) | ||
) | ||
ggsave(paste0(projPM@projectMetadata$outputDirectory, "/Plots/TFbinding/01_Barplot_MarkerPeaks.pdf"), | ||
height = 8, | ||
width = 10) | ||
|
||
# 4. Add motif annotations to each peak | ||
|
||
projPM <- addMotifAnnotations(ArchRProj = projPM, | ||
motifSet = "JASPAR2020", | ||
#collection = "CORE", | ||
annoName = "Motif") | ||
#force = TRUE) # force was necessary after subsetting to SCZ samples | ||
|
||
|
||
# 5. Motif Enrichment in Marker Peaks | ||
|
||
enrichMotifs <- peakAnnoEnrichment( | ||
seMarker = markersPeaks, | ||
ArchRProj = projPM, | ||
peakAnnotation = "Motif", | ||
cutOff = "FDR <= 0.05 & Log2FC >= 1" | ||
) | ||
enrichMotifs | ||
head(enrichMotifs@assays@data$mlog10p) | ||
|
||
saveRDS(enrichMotifs, "data/enrichMotifs.rds") | ||
enrichMotifs <- readRDS("data/enrichMotifs.rds") | ||
|
||
# 5.1 Plot heatmap of enriched motifs per cell type | ||
|
||
heatmapEM <- plotEnrichHeatmap(enrichMotifs, | ||
n = 7, # limit of motifs shown per cell type | ||
transpose = TRUE) | ||
ComplexHeatmap::draw(heatmapEM, | ||
heatmap_legend_side = "bot", | ||
annotation_legend_side = "bot") | ||
plotPDF(heatmapEM, | ||
name = "TFbinding/01_Motifs-Enriched-Marker-Heatmap", | ||
width = 8, | ||
height = 6, | ||
ArchRProj = projPM, | ||
addDOC = FALSE) | ||
|
||
# 5.2 TF binding enrichments with P < 1*10^-10 | ||
|
||
# extract motif-celltype pairs with respective P value | ||
tfenr <- as.data.frame(enrichMotifs@assays@data$mlog10p) %>% | ||
rownames_to_column(var = "motif") %>% | ||
pivot_longer(!motif, names_to="celltype", values_to="mlog10p") %>% | ||
filter(mlog10p >= 10) | ||
write.csv(tfenr, "data/01_MotifEnrichments_threshold.csv") | ||
|
||
# count enriched motifs per celltype | ||
tf_enr_count <- tfenr %>% | ||
group_by(celltype) %>% | ||
count(celltype) | ||
write.csv(tf_enr_count, "data/01_MotifEnrichments_count.csv") | ||
|
||
# # A tibble: 15 × 2 | ||
# # Groups: celltype [15] | ||
# celltype n | ||
# <chr> <int> | ||
# 1 Astro_FB 127 | ||
# 2 Astro_PP 128 | ||
# 3 Endothelial 86 | ||
# 4 Exc_L2-3 111 | ||
# 5 Exc_L3-5 143 | ||
# 6 Exc_L4-6_1 127 | ||
# 7 Exc_L4-6_2 98 | ||
# 8 In_LAMP5 57 | ||
# 9 In_PVALB_Ba 78 | ||
# 10 In_PVALB_Ch 150 | ||
# 11 In_SST 79 | ||
# 12 In_VIP 122 | ||
# 13 Microglia 153 | ||
# 14 OPC 113 | ||
# 15 Oligodendrocyte 111 | ||
|
||
# Barplot with number of motif enrichments with P < 1*10^-10 per celltype | ||
ggplot(tf_enr_count, aes(x=celltype, y=n)) + | ||
geom_bar(stat="identity", fill="steelblue")+ | ||
geom_text(aes(label=n), vjust=1.6, color="white", size=3.5)+ | ||
xlab("") + | ||
ylab("Number of enriched motifs") + | ||
scale_x_discrete(guide = guide_axis(angle = 45)) + | ||
theme_minimal() + | ||
theme( | ||
strip.text.x = element_text(size = 15), | ||
axis.title.x = element_text(size = 15), | ||
axis.title.y = element_text(size = 15), | ||
axis.text.x = element_text(size = 12), | ||
axis.text.y = element_text(size = 12), | ||
legend.text = element_text(size = 12), | ||
legend.title = element_text(size = 12) | ||
) | ||
ggsave(paste0(projPM@projectMetadata$outputDirectory, "/Plots/TFbinding/01_Barplot_Enriched_Motifs.pdf"), | ||
height = 8, | ||
width = 10) | ||
|
||
# 6. Save ArchR project with motif annotations | ||
saveArchRProject(ArchRProj = projPM) |
Oops, something went wrong.