Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
##################################################
## Project: scATACseq label transfer
## Date: 22.07.2020
## Author: Nathalie
##################################################
setwd("~/Documents/PostmortemBrain/analysis/markerGeneDefinition")
#setwd("/net/PE1/raid1/LAURA/SC_ANALYSIS/markerGeneDefinition")
library(Seurat)
#library(Signac)
library(dplyr)
library(reshape2)
library(RColorBrewer)
library(ggplot2)
library(org.Hs.eg.db)
library(Matrix)
library(readr)
### DATA LAURA ###################################
dataset <- "dataLaura"
#Read ATAC data -----------------------
#genome regions
mat<- readMM(paste0(dataset,"/raw_atac/matrix.mtx"))
peaks<- read_tsv(paste0(dataset,"/raw_atac/peaks.bed"), col_names = F)
peaks<- peaks %>%
mutate(id1 = paste(X2, X3, sep = "-")) %>%
mutate(id = paste(X1, id1, sep = ":"))
barcodes<- read_tsv(paste0(dataset,"/raw_atac/barcodes.tsv"), col_names =F)
rownames(mat)<- peaks$id1
colnames(mat)<- barcodes$X1
data_atac <- CreateSeuratObject(counts = mat,
assay = "ATAC",
project = dataset,
min.features = 500)
data_atac <- subset(data_atac, subset = nCount_ATAC > 500 & nCount_ATAC < 10000)
rm(mat)
rm(peaks)
rm(barcodes)
#Read binarized object and add meta data to raw data
data_binarized <- readRDS(paste0(dataset, "/scATAC_PMaggr_clusterd_annotated.rds"))
data_atac <- AddMetaData(object = data_atac,
metadata = data_binarized@meta.data)
rm(data_binarized)
#Read Cicero gene activity matrix
gene_atac <- readRDS(paste0(dataset,"/PM_geneActivities_dgMat.rds"))
#Get gene symbols for ENSEMBL IDs
symbols <- unname(mapIds(org.Hs.eg.db, keys = dimnames(gene_atac)[[1]], keytype = "ENSEMBL", column="SYMBOL"))
dimnames(gene_atac)[[1]] <- symbols
gene_atac <- as(gene_atac, "dgTMatrix")
#Remove duplicated gene symbols from matrix
OP2 <- function(x) {
nms <- rownames(x)
uniquenms <- unique(nms)
# build the sparseMatrix again: x's with same index values are automatically
# added together, keeping in mind that indexes stored from 0 but built from 1
sparseMatrix(i = match(nms, uniquenms)[x@i + 1],
j = x@j + 1,
x = x@x,
dimnames = list(uniquenms, colnames(x)),
giveCsparse = FALSE)
}
gene_atac <- OP2(gene_atac)
gene_atac <- gene_atac[-which(is.na(rownames(gene_atac))),]
#Use only cells that are present in both (gene activity matrix and feature matrix)
cells_intersect <- intersect(colnames(data_atac), colnames(gene_atac))
data_atac <- subset(data_atac, cells = cells_intersect)
data_atac[["ACTIVITY"]] <- CreateAssayObject(counts = gene_atac[, cells_intersect])
rm(gene_atac)
rm(cells_intersect)
rm(symbols)
data_atac$tech <- "atac"
#Preprocessing of ATAC data -------------------
DefaultAssay(data_atac) <- "ACTIVITY"
data_atac <- FindVariableFeatures(data_atac)
#Normalization already done with Cicero by Michael
#data_atac <- NormalizeData(data_atac)
#data_atac <- ScaleData(data_atac)
DefaultAssay(data_atac) <- "ATAC"
VariableFeatures(data_atac) <- names(which(Matrix::rowSums(data_atac) > 100))
data_atac <- RunLSI(data_atac, n = 50, scale.max = NULL)
#Sequencing depth correlation, remove 1st component of downstream analysis if strong correlation
DepthCor(data_atac) # --> 1st component can remain
DepthCor(data_atac, n = 50)
#Non-linear dimension reduction and clustering
data_atac <- RunUMAP(data_atac, reduction = "lsi", dims = 1:50)
data_atac <- FindNeighbors(data_atac, reduction = "lsi", dims = 1:50)
data_atac$seurat_clusters_michael <- data_atac$seurat_clusters #copy clusters, otherwise overwritten
data_atac <- FindClusters(data_atac, resolution = 0.1)
DimPlot(object = data_atac, group = "seurat_clusters", label = TRUE) + NoLegend()
#Read RNA data with cell type labels
data_rna <- readRDS(paste0(dataset,"/data_object_sct_integrated.rds"))
data_rna$tech <- "rna"
#Plot ATAC and RNA data next to each other (with Michael's labels)
p1 <- DimPlot(data_atac, reduction = "umap", group.by = "jointId", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scATAC-seq")
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scRNA-seq")
p1 + p2
#Label transfer ------------------
#Remove genes from RNA data that are not in ATAC data (for transfer)
intersect_features <- intersect(dimnames(data_rna@assays$RNA)[[1]],
dimnames(data_atac@assays$ACTIVITY)[[1]])
data_rna <- subset(x = data_rna, features = intersect_features)
#Find Transfer Anchors and Transfer labels
transfer.anchors <- FindTransferAnchors(reference = data_rna, query = data_atac,
features = VariableFeatures(object = data_rna),
reference.assay = "RNA", query.assay = "ACTIVITY",
reduction = "cca")
save.image(paste0(dataset,"/preprocessed_atac_data_precalculated_geneactivity.RData"))
load(paste0(dataset,"/preprocessed_atac_data_precalculated_geneactivity.RData"))
celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = data_rna$sub_cluster,
weight.reduction = data_atac[["lsi"]])
data_atac <- AddMetaData(data_atac, metadata = celltype.predictions)
#Prediction scores (quality of prediction)
hist(data_atac$prediction.score.max)
abline(v = 0.5, col = "red")
#How many cell have prediction score > 0.5 --> about 50%
table(data_atac$prediction.score.max > 0.5)
#Plot UMAPs with transferred cell type labels (ATAC and RNA)
data_atac$predicted.id <- factor(data_atac$predicted.id, levels = levels(as.factor(data_rna$sub_cluster))) # to make the colors match
p1 <- DimPlot(data_atac, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("scATAC-seq cells") +
NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNA-seq cells") +
NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer.png"), width = 900, height = 600)
p1 + p2
dev.off()
#Plot UMAPs with transferred labels and seurat clusters
p1 <- DimPlot(data_atac, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("Transferred Labels") +
NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_atac, group.by = "seurat_clusters", label = TRUE, repel = TRUE) + ggtitle("Seurat Clusters") +
NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_seuratClusters.png"), width = 900, height = 600)
p1 + p2
dev.off()
### Assign cell type clusters to labels ------------
x <- as.numeric(data_atac$seurat_clusters)
y <- as.character(data_atac$predicted.id)
overlap <- table(x, y)
# Plot relative values from each louvain cluster
rel_ol <- function(m) m / rep(colSums(m), each = nrow(m))
overlap_perc <- rel_ol(t(overlap))
overlap_perc_long <- melt(overlap_perc)
overlap_perc_long$x <- as.factor(overlap_perc_long$x)
ggplot(overlap_perc_long, aes(x,y))+
geom_tile(aes(fill=value))+
geom_text(aes(label=round(x = value, digits = 2)))+
scale_fill_gradient(low="white",
high="darkred")+
theme(panel.grid.major.x=element_blank(), #no gridlines
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
panel.background=element_rect(fill="white"), # background=white
axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
plot.title = element_text(size=15,face="bold"),
axis.text.y = element_text(size = 12,face = "bold")) +
ggtitle("Overlap Louvain Cluster and Transferred Labels")+
theme(legend.title=element_text(face="bold", size=14)) +
scale_x_discrete(name="Louvain cluster")+
scale_y_discrete(name="Transferred Label")+
labs(fill="Overlap")
ggsave(paste0(dataset,"/atac_overlap_louvain_transferlabel.png"))
# Assign labels to louvain clusters according to highest overlap
ass_cluster <- apply(overlap_perc, 2, function(t)
rownames(overlap_perc)[which.max(t)])
ass_vec <- ass_cluster[as.character(as.numeric(data_atac$seurat_clusters))]
data_atac <- AddMetaData(
object = data_atac,
metadata = ass_vec,
col.name = "seurat_transfer_label"
)
p1 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("scATACseq") +
NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNAseq") +
NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_clusters.png"), width = 900, height = 600)
p1 + p2
dev.off()
#Subset cells from seurat cluster 0 and refine assignment
data_0 <- subset(data_atac, subset = seurat_clusters == 0)
data_0 <- FindNeighbors(data_0, reduction = "lsi", dims = 1:50)
data_0 <- FindClusters(data_0, resolution = 0.1)
#Overlap
x <- as.numeric(data_0$seurat_clusters)
y <- as.character(data_0$predicted.id)
overlap <- table(x, y)
overlap_perc <- rel_ol(t(overlap))
overlap_perc_long <- melt(overlap_perc)
overlap_perc_long$x <- as.factor(overlap_perc_long$x)
ggplot(overlap_perc_long, aes(x,y))+
geom_tile(aes(fill=value))+
geom_text(aes(label=round(x = value, digits = 2)))+
scale_fill_gradient(low="white",
high="darkred")+
theme(panel.grid.major.x=element_blank(), #no gridlines
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
panel.background=element_rect(fill="white"), # background=white
axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
plot.title = element_text(size=15,face="bold"),
axis.text.y = element_text(size = 12,face = "bold")) +
ggtitle("Overlap Louvain Subcluster (Cluster 0) and Transferred Labels")+
theme(legend.title=element_text(face="bold", size=14)) +
scale_x_discrete(name="Louvain cluster")+
scale_y_discrete(name="Transferred Label")+
labs(fill="Overlap")
ggsave(paste0(dataset,"/atac_overlap_louvain_cluster0.png"), width = 12, height = 18)
#Assign labels to louvain clusters according to highest overlap
ass_cluster <- apply(overlap_perc, 2, function(t)
rownames(overlap_perc)[which.max(t)])
ass_vec <- ass_cluster[as.character(as.numeric(data_0$seurat_clusters))]
data_0 <- AddMetaData(
object = data_0,
metadata = ass_vec,
col.name = "seurat_transfer_cluster0"
)
#Merge cluster assignments of Cluster 0 to data object
data_atac$seurat_transfer_label[Cells(data_0)] <- data_0$seurat_transfer_cluster0
#Set Cluster 1 to "Unknown" since these cells are most likely not excitatory neurons
data_atac$seurat_transfer_label[data_atac$seurat_clusters == 1] <- "Unknown"
#Plot UMAP with refined assignment
p1 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("scATACseq") +
NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNAseq") +
NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_clustersRefined.png"), width = 900, height = 600)
p1 + p2
dev.off()
### Overlap between transferred labels and Michael's annotations ------------
x <- as.character(data_atac$jointId)
y <- as.character(data_atac$predicted.id)
overlap <- table(x, y)
# Plot relative values from each louvain cluster
rel_ol <- function(m) m / rep(colSums(m), each = nrow(m))
overlap_perc <- rel_ol(t(overlap))
overlap_perc_long <- melt(overlap_perc)
overlap_perc_long$x <- as.factor(overlap_perc_long$x)
ggplot(overlap_perc_long, aes(x,y))+
geom_tile(aes(fill=value))+
geom_text(aes(label=round(x = value, digits = 2)))+
scale_fill_gradient(low="white",
high="darkred")+
theme(panel.grid.major.x=element_blank(), #no gridlines
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
panel.background=element_rect(fill="white"), # background=white
axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
plot.title = element_text(size=15,face="bold"),
axis.text.y = element_text(size = 12,face = "bold")) +
ggtitle("Overlap Michael's annotations and Transferred Labels")+
theme(legend.title=element_text(face="bold", size=14)) +
scale_x_discrete(name="Michael's annotations")+
scale_y_discrete(name="Transferred Label")+
labs(fill="Overlap")
ggsave(paste0(dataset,"/atac_overlap_michaelsannotations.png"))
p1 <- DimPlot(data_atac, group.by = "jointId", label = TRUE, repel = TRUE) + ggtitle("Michael's annotations") +
NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("Transferred Labels") +
NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_comparisonMichaelsAnnotations.png"), width = 900, height = 600)
p1 + p2
dev.off()
#Save Seurat object
saveRDS(data_atac, file = paste0(dataset,"/scATAC_labeltransfer.rds"))
# # COEMBEDDING
# # restrict the imputation to variable genes from scRNA-seq
# genes.use <- VariableFeatures(data_rna)
# refdata <- GetAssayData(data_rna, assay = "RNA", slot = "data")[genes.use, ]
#
# # refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells. imputation
# # (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
# imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = data_atac[["lsi"]])
#
# # add the imputed data matrix to the atac object
# data_atac[["ATAC"]] <- imputation
# coembed <- merge(x = data_rna, y = data_atac)
#
# # run PCA and UMAP on combined object, to visualize the co-embedding of both datasets
# coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
# coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
# coembed <- RunUMAP(coembed, dims = 1:30)
# coembed$celltype <- ifelse(!is.na(coembed$sub_cluster), coembed$sub_cluster, coembed$predicted.id)
#
# # plot co-embedding
# p1 <- DimPlot(coembed, group.by = "tech")
# p2 <- DimPlot(coembed, group.by = "celltype", label = TRUE, repel = TRUE)
# png(paste0(dataset,"/atac_UMAP_coembedding.png"), width = 900, height = 600)
# p1 + p2
# dev.off()
#