Skip to content

Commit

Permalink
add TF binding code
Browse files Browse the repository at this point in the history
  • Loading branch information
nathalie committed Feb 3, 2025
0 parents commit ca70146
Show file tree
Hide file tree
Showing 16 changed files with 2,652 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Schizophrenia risk variants modulate transcription factor binding and gene expression in cortical cell types

Schizophrenia is a complex neuropsychiatric disorder with a strong genetic component. Genome-wide association studies (GWAS) have identified numerous risk variants, but their functional impact on gene regulation remains largely unknown. We investigate the disruption and enhancement of transcription factor (TF) binding motifs by schizophrenia-associated GWAS SNPs in 15 cortical cell types of the human brain. By integrating single-nucleus ATAC-seq and RNA-seq data from 71 donors (36 affected by schizophrenia) with GWAS summary statistics, we identify specific TF motifs whose binding affinities are altered by risk and protective SNPs. We find that risk SNPs predominantly disrupt TF binding, while protective SNPs show a more balanced effect. Furthermore, we demonstrate that disrupted TF motifs can lead to altered expression of target genes, including NAGA in excitatory neurons and HLA-B in oligodendrocyte precursor cells. These genes have been previously implicated in schizophrenia and our study provides a mechanism for their dysregulation through altered TF binding. Our findings highlight the importance of considering cell type-specific effects and provide a genome-wide map of TF motif disruptions in schizophrenia, offering insights into the regulatory mechanisms underlying disease risk.

This repository contains all scripts used for the analysis of TF binding disruption and enhancement through schizophrenia GWAS SNPs and the differential expression analysis of respective target genes between carriers and non-carriers of the respective alternative allele.
171 changes: 171 additions & 0 deletions TF_binding/01_CellTypeSpecificPeaks_SCZ.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
## Identify marker features based on the peak matrix
## only on SCZ cases and controls

# run with conda environment sc-atac

library(ArchR)
library(dplyr)
library(tibble)
library(tidyr)
library(RColorBrewer)
library(BSgenome.Hsapiens.UCSC.hg38)

# folder of ArchR project
archr_folder <- "../ArchRSubset_SCZ"

# 1. Read data

projPM <- loadArchRProject(path = paste0(archr_folder,"/"))

# 2. Check out available matrices, peak matrix was added to ArchR project in 14_1_markerFeatures.R, but marker features were not saved to object or file
#projPM_peak <- addPeakMatrix(projPM)
getAvailableMatrices(projPM)
# [1] "GeneIntegrationMatrix" "GeneScoreMatrix" "MotifMatrix"
# [4] "MotifMatrix_SCZ" "PeakMatrix" "TileMatrix"
peak_mat <- getMatrixFromProject(projPM, useMatrix="PeakMatrix")
min(peak_mat@colData$ReadsInPeaks) # 482

# 3. Add marker features for each cell type compared to all others

markersPeaks <- getMarkerFeatures(
ArchRProj = projPM,
useMatrix = "PeakMatrix",
groupBy = "celltypes_refined",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon"
)

saveRDS(markersPeaks, "data/markersPeaks.rds")
markersPeaks <- readRDS("data/markersPeaks.rds")

markerList <- getMarkers(markersPeaks,
cutOff = "FDR <= 0.05 & Log2FC >= 1")
markerList
markerList_GRanges <- getMarkers(markersPeaks,
cutOff = "FDR <= 0.05 & Log2FC >= 1",
returnGR = TRUE)
markerList_GRanges

# 3a. Plot number of marker peaks per cell type
n_markers <- sapply(markerList, nrow)
df_n_markers <- data.frame(celltype=names(n_markers),
value=n_markers, row.names=NULL)
write.csv(df_n_markers, "data/01_Number-MarkerPeaks.csv")

# Barplot with number of marker peaks per celltype
ggplot(df_n_markers, aes(x=celltype, y=value)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=value), vjust=1.6, color="white", size=3.5)+
xlab("") +
ylab("Number of marker peaks") +
scale_x_discrete(guide = guide_axis(angle = 45)) +
theme_minimal() +
theme(
strip.text.x = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12)
)
ggsave(paste0(projPM@projectMetadata$outputDirectory, "/Plots/TFbinding/01_Barplot_MarkerPeaks.pdf"),
height = 8,
width = 10)

# 4. Add motif annotations to each peak

projPM <- addMotifAnnotations(ArchRProj = projPM,
motifSet = "JASPAR2020",
#collection = "CORE",
annoName = "Motif")
#force = TRUE) # force was necessary after subsetting to SCZ samples


# 5. Motif Enrichment in Marker Peaks

enrichMotifs <- peakAnnoEnrichment(
seMarker = markersPeaks,
ArchRProj = projPM,
peakAnnotation = "Motif",
cutOff = "FDR <= 0.05 & Log2FC >= 1"
)
enrichMotifs
head(enrichMotifs@assays@data$mlog10p)

saveRDS(enrichMotifs, "data/enrichMotifs.rds")
enrichMotifs <- readRDS("data/enrichMotifs.rds")

# 5.1 Plot heatmap of enriched motifs per cell type

heatmapEM <- plotEnrichHeatmap(enrichMotifs,
n = 7, # limit of motifs shown per cell type
transpose = TRUE)
ComplexHeatmap::draw(heatmapEM,
heatmap_legend_side = "bot",
annotation_legend_side = "bot")
plotPDF(heatmapEM,
name = "TFbinding/01_Motifs-Enriched-Marker-Heatmap",
width = 8,
height = 6,
ArchRProj = projPM,
addDOC = FALSE)

# 5.2 TF binding enrichments with P < 1*10^-10

# extract motif-celltype pairs with respective P value
tfenr <- as.data.frame(enrichMotifs@assays@data$mlog10p) %>%
rownames_to_column(var = "motif") %>%
pivot_longer(!motif, names_to="celltype", values_to="mlog10p") %>%
filter(mlog10p >= 10)
write.csv(tfenr, "data/01_MotifEnrichments_threshold.csv")

# count enriched motifs per celltype
tf_enr_count <- tfenr %>%
group_by(celltype) %>%
count(celltype)
write.csv(tf_enr_count, "data/01_MotifEnrichments_count.csv")

# # A tibble: 15 × 2
# # Groups: celltype [15]
# celltype n
# <chr> <int>
# 1 Astro_FB 127
# 2 Astro_PP 128
# 3 Endothelial 86
# 4 Exc_L2-3 111
# 5 Exc_L3-5 143
# 6 Exc_L4-6_1 127
# 7 Exc_L4-6_2 98
# 8 In_LAMP5 57
# 9 In_PVALB_Ba 78
# 10 In_PVALB_Ch 150
# 11 In_SST 79
# 12 In_VIP 122
# 13 Microglia 153
# 14 OPC 113
# 15 Oligodendrocyte 111

# Barplot with number of motif enrichments with P < 1*10^-10 per celltype
ggplot(tf_enr_count, aes(x=celltype, y=n)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=n), vjust=1.6, color="white", size=3.5)+
xlab("") +
ylab("Number of enriched motifs") +
scale_x_discrete(guide = guide_axis(angle = 45)) +
theme_minimal() +
theme(
strip.text.x = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12)
)
ggsave(paste0(projPM@projectMetadata$outputDirectory, "/Plots/TFbinding/01_Barplot_Enriched_Motifs.pdf"),
height = 8,
width = 10)

# 6. Save ArchR project with motif annotations
saveArchRProject(ArchRProj = projPM)
Loading

0 comments on commit ca70146

Please sign in to comment.