05_labeltransfer_atac_cicero_geneactivity.R

##################################################
## Project: scATACseq label transfer
## Date: 22.07.2020
## Author: Nathalie
##################################################

setwd("~/Documents/PostmortemBrain/analysis/markerGeneDefinition")
#setwd("/net/PE1/raid1/LAURA/SC_ANALYSIS/markerGeneDefinition")

library(Seurat)
#library(Signac)
library(dplyr)
library(reshape2)
library(RColorBrewer)
library(ggplot2)
library(org.Hs.eg.db)
library(Matrix)
library(readr)

### DATA LAURA ###################################

dataset <- "dataLaura"

#Read ATAC data -----------------------
#genome regions
mat<- readMM(paste0(dataset,"/raw_atac/matrix.mtx"))
peaks<- read_tsv(paste0(dataset,"/raw_atac/peaks.bed"), col_names = F)
peaks<- peaks %>%
  mutate(id1 = paste(X2, X3, sep = "-")) %>%
  mutate(id = paste(X1, id1, sep = ":"))
barcodes<- read_tsv(paste0(dataset,"/raw_atac/barcodes.tsv"), col_names =F)
rownames(mat)<- peaks$id1
colnames(mat)<- barcodes$X1
data_atac <- CreateSeuratObject(counts = mat,
                                assay = "ATAC",
                                project = dataset,
                                min.features = 500)
data_atac <- subset(data_atac, subset = nCount_ATAC > 500 & nCount_ATAC < 10000)
rm(mat)
rm(peaks)
rm(barcodes)
#Read binarized object and add meta data to raw data
data_binarized <- readRDS(paste0(dataset, "/scATAC_PMaggr_clusterd_annotated.rds"))
data_atac <- AddMetaData(object = data_atac,
                         metadata = data_binarized@meta.data)
rm(data_binarized)
#Read Cicero gene activity matrix
gene_atac <- readRDS(paste0(dataset,"/PM_geneActivities_dgMat.rds"))


#Get gene symbols for ENSEMBL IDs
symbols <- unname(mapIds(org.Hs.eg.db, keys = dimnames(gene_atac)[[1]], keytype = "ENSEMBL", column="SYMBOL"))
dimnames(gene_atac)[[1]] <- symbols
gene_atac <- as(gene_atac, "dgTMatrix")
#Remove duplicated gene symbols from matrix
OP2 <- function(x) {
  nms <- rownames(x)
  uniquenms <- unique(nms)
  # build the sparseMatrix again: x's with same index values are automatically
  # added together, keeping in mind that indexes stored from 0 but built from 1
  sparseMatrix(i = match(nms, uniquenms)[x@i + 1],
               j = x@j + 1,
               x = x@x,
               dimnames = list(uniquenms, colnames(x)),
               giveCsparse = FALSE)
}
gene_atac <- OP2(gene_atac)
gene_atac <- gene_atac[-which(is.na(rownames(gene_atac))),]
#Use only cells that are present in both (gene activity matrix and feature matrix)
cells_intersect <- intersect(colnames(data_atac), colnames(gene_atac))
data_atac <- subset(data_atac, cells = cells_intersect)
data_atac[["ACTIVITY"]] <- CreateAssayObject(counts = gene_atac[, cells_intersect])
rm(gene_atac)
rm(cells_intersect)
rm(symbols)

data_atac$tech <- "atac"


#Preprocessing of ATAC data -------------------
DefaultAssay(data_atac) <- "ACTIVITY"
data_atac <- FindVariableFeatures(data_atac)
#Normalization already done with Cicero by Michael
#data_atac <- NormalizeData(data_atac)
#data_atac <- ScaleData(data_atac)

DefaultAssay(data_atac) <- "ATAC"
VariableFeatures(data_atac) <- names(which(Matrix::rowSums(data_atac) > 100))
data_atac <- RunLSI(data_atac, n = 50, scale.max = NULL)
#Sequencing depth correlation, remove 1st component of downstream analysis if strong correlation
DepthCor(data_atac) # --> 1st component can remain
DepthCor(data_atac, n = 50)
#Non-linear dimension reduction and clustering
data_atac <- RunUMAP(data_atac, reduction = "lsi", dims = 1:50)
data_atac <- FindNeighbors(data_atac, reduction = "lsi", dims = 1:50)
data_atac$seurat_clusters_michael <- data_atac$seurat_clusters #copy clusters, otherwise overwritten
data_atac <- FindClusters(data_atac, resolution = 0.1)
DimPlot(object = data_atac, group = "seurat_clusters", label = TRUE) + NoLegend()

#Read RNA data with cell type labels
data_rna <- readRDS(paste0(dataset,"/data_object_sct_integrated.rds"))
data_rna$tech <- "rna"

#Plot ATAC and RNA data next to each other (with Michael's labels)
p1 <- DimPlot(data_atac, reduction = "umap", group.by = "jointId", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scATAC-seq")
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scRNA-seq")
p1 + p2


#Label transfer ------------------
#Remove genes from RNA data that are not in ATAC data (for transfer)
intersect_features <- intersect(dimnames(data_rna@assays$RNA)[[1]],
                                dimnames(data_atac@assays$ACTIVITY)[[1]])
data_rna <- subset(x = data_rna, features = intersect_features)

#Find Transfer Anchors and Transfer labels
transfer.anchors <- FindTransferAnchors(reference = data_rna, query = data_atac,
                                        features = VariableFeatures(object = data_rna),
                                        reference.assay = "RNA", query.assay = "ACTIVITY",
                                        reduction = "cca")
save.image(paste0(dataset,"/preprocessed_atac_data_precalculated_geneactivity.RData"))
load(paste0(dataset,"/preprocessed_atac_data_precalculated_geneactivity.RData"))
celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = data_rna$sub_cluster,
                                     weight.reduction = data_atac[["lsi"]])
data_atac <- AddMetaData(data_atac, metadata = celltype.predictions)

#Prediction scores (quality of prediction)
hist(data_atac$prediction.score.max)
abline(v = 0.5, col = "red")

#How many cell have prediction score > 0.5 --> about 50%
table(data_atac$prediction.score.max > 0.5)

#Plot UMAPs with transferred cell type labels (ATAC and RNA)
data_atac$predicted.id <- factor(data_atac$predicted.id, levels = levels(as.factor(data_rna$sub_cluster)))  # to make the colors match
p1 <- DimPlot(data_atac, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("scATAC-seq cells") +
  NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNA-seq cells") +
  NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer.png"), width = 900, height = 600)
p1 + p2
dev.off()

#Plot UMAPs with transferred labels and seurat clusters
p1 <- DimPlot(data_atac, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("Transferred Labels") +
  NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_atac, group.by = "seurat_clusters", label = TRUE, repel = TRUE) + ggtitle("Seurat Clusters") +
  NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_seuratClusters.png"), width = 900, height = 600)
p1 + p2
dev.off()


### Assign cell type clusters to labels ------------
x <- as.numeric(data_atac$seurat_clusters)
y <- as.character(data_atac$predicted.id)
overlap <- table(x, y)

# Plot relative values from each louvain cluster
rel_ol <- function(m) m / rep(colSums(m), each = nrow(m))
overlap_perc <- rel_ol(t(overlap))

overlap_perc_long <- melt(overlap_perc)
overlap_perc_long$x <- as.factor(overlap_perc_long$x)

ggplot(overlap_perc_long, aes(x,y))+
  geom_tile(aes(fill=value))+
  geom_text(aes(label=round(x = value, digits = 2)))+
  scale_fill_gradient(low="white",
                      high="darkred")+
  theme(panel.grid.major.x=element_blank(), #no gridlines
        panel.grid.minor.x=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.minor.y=element_blank(),
        panel.background=element_rect(fill="white"), # background=white
        axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
        plot.title = element_text(size=15,face="bold"),
        axis.text.y = element_text(size = 12,face = "bold")) +
  ggtitle("Overlap Louvain Cluster and Transferred Labels")+
  theme(legend.title=element_text(face="bold", size=14)) +
  scale_x_discrete(name="Louvain cluster")+
  scale_y_discrete(name="Transferred Label")+
  labs(fill="Overlap")
ggsave(paste0(dataset,"/atac_overlap_louvain_transferlabel.png"))

# Assign labels to louvain clusters according to highest overlap
ass_cluster <- apply(overlap_perc, 2, function(t)
  rownames(overlap_perc)[which.max(t)])
ass_vec <- ass_cluster[as.character(as.numeric(data_atac$seurat_clusters))]

data_atac <- AddMetaData(
  object = data_atac,
  metadata = ass_vec,
  col.name = "seurat_transfer_label"
)

p1 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("scATACseq") +
  NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNAseq") +
  NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_clusters.png"), width = 900, height = 600)
p1 + p2
dev.off()


#Subset cells from seurat cluster 0 and refine assignment
data_0 <- subset(data_atac, subset = seurat_clusters == 0)
data_0 <- FindNeighbors(data_0, reduction = "lsi", dims = 1:50)
data_0 <- FindClusters(data_0, resolution = 0.1)
#Overlap
x <- as.numeric(data_0$seurat_clusters)
y <- as.character(data_0$predicted.id)
overlap <- table(x, y)

overlap_perc <- rel_ol(t(overlap))
overlap_perc_long <- melt(overlap_perc)
overlap_perc_long$x <- as.factor(overlap_perc_long$x)

ggplot(overlap_perc_long, aes(x,y))+
  geom_tile(aes(fill=value))+
  geom_text(aes(label=round(x = value, digits = 2)))+
  scale_fill_gradient(low="white",
                      high="darkred")+
  theme(panel.grid.major.x=element_blank(), #no gridlines
        panel.grid.minor.x=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.minor.y=element_blank(),
        panel.background=element_rect(fill="white"), # background=white
        axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
        plot.title = element_text(size=15,face="bold"),
        axis.text.y = element_text(size = 12,face = "bold")) +
  ggtitle("Overlap Louvain Subcluster (Cluster 0) and Transferred Labels")+
  theme(legend.title=element_text(face="bold", size=14)) +
  scale_x_discrete(name="Louvain cluster")+
  scale_y_discrete(name="Transferred Label")+
  labs(fill="Overlap")
ggsave(paste0(dataset,"/atac_overlap_louvain_cluster0.png"), width = 12, height = 18)

#Assign labels to louvain clusters according to highest overlap
ass_cluster <- apply(overlap_perc, 2, function(t)
  rownames(overlap_perc)[which.max(t)])
ass_vec <- ass_cluster[as.character(as.numeric(data_0$seurat_clusters))]

data_0 <- AddMetaData(
  object = data_0,
  metadata = ass_vec,
  col.name = "seurat_transfer_cluster0"
)

#Merge cluster assignments of Cluster 0 to data object
data_atac$seurat_transfer_label[Cells(data_0)] <- data_0$seurat_transfer_cluster0


#Set Cluster 1 to "Unknown" since these cells are most likely not excitatory neurons
data_atac$seurat_transfer_label[data_atac$seurat_clusters == 1] <- "Unknown"


#Plot UMAP with refined assignment
p1 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("scATACseq") +
  NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNAseq") +
  NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_clustersRefined.png"), width = 900, height = 600)
p1 + p2
dev.off()


### Overlap between transferred labels and Michael's annotations ------------
x <- as.character(data_atac$jointId)
y <- as.character(data_atac$predicted.id)
overlap <- table(x, y)

# Plot relative values from each louvain cluster
rel_ol <- function(m) m / rep(colSums(m), each = nrow(m))
overlap_perc <- rel_ol(t(overlap))

overlap_perc_long <- melt(overlap_perc)
overlap_perc_long$x <- as.factor(overlap_perc_long$x)

ggplot(overlap_perc_long, aes(x,y))+
  geom_tile(aes(fill=value))+
  geom_text(aes(label=round(x = value, digits = 2)))+
  scale_fill_gradient(low="white",
                      high="darkred")+
  theme(panel.grid.major.x=element_blank(), #no gridlines
        panel.grid.minor.x=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.minor.y=element_blank(),
        panel.background=element_rect(fill="white"), # background=white
        axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
        plot.title = element_text(size=15,face="bold"),
        axis.text.y = element_text(size = 12,face = "bold")) +
  ggtitle("Overlap Michael's annotations and Transferred Labels")+
  theme(legend.title=element_text(face="bold", size=14)) +
  scale_x_discrete(name="Michael's annotations")+
  scale_y_discrete(name="Transferred Label")+
  labs(fill="Overlap")
ggsave(paste0(dataset,"/atac_overlap_michaelsannotations.png"))

p1 <- DimPlot(data_atac, group.by = "jointId", label = TRUE, repel = TRUE) + ggtitle("Michael's annotations") +
  NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("Transferred Labels") +
  NoLegend()
png(paste0(dataset,"/atac_UMAP_labeltransfer_comparisonMichaelsAnnotations.png"), width = 900, height = 600)
p1 + p2
dev.off()

#Save Seurat object
saveRDS(data_atac, file = paste0(dataset,"/scATAC_labeltransfer.rds"))


# # COEMBEDDING
# # restrict the imputation to variable genes from scRNA-seq
# genes.use <- VariableFeatures(data_rna)
# refdata <- GetAssayData(data_rna, assay = "RNA", slot = "data")[genes.use, ]
#
# # refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation
# # (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
# imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = data_atac[["lsi"]])
#
# # add the imputed data matrix to the atac object
# data_atac[["ATAC"]] <- imputation
# coembed <- merge(x = data_rna, y = data_atac)
#
# # run PCA and UMAP on combined object, to visualize the co-embedding of both datasets
# coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
# coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
# coembed <- RunUMAP(coembed, dims = 1:30)
# coembed$celltype <- ifelse(!is.na(coembed$sub_cluster), coembed$sub_cluster, coembed$predicted.id)
#
# # plot co-embedding
# p1 <- DimPlot(coembed, group.by = "tech")
# p2 <- DimPlot(coembed, group.by = "celltype", label = TRUE, repel = TRUE)
# png(paste0(dataset,"/atac_UMAP_coembedding.png"), width = 900, height = 600)
# p1 + p2
# dev.off()
#
	##################################################
	## Project: scATACseq label transfer
	## Date: 22.07.2020
	## Author: Nathalie
	##################################################

	setwd("~/Documents/PostmortemBrain/analysis/markerGeneDefinition")
	#setwd("/net/PE1/raid1/LAURA/SC_ANALYSIS/markerGeneDefinition")

	library(Seurat)
	#library(Signac)
	library(dplyr)
	library(reshape2)
	library(RColorBrewer)
	library(ggplot2)
	library(org.Hs.eg.db)
	library(Matrix)
	library(readr)

	### DATA LAURA ###################################

	dataset <- "dataLaura"

	#Read ATAC data -----------------------
	#genome regions
	mat<- readMM(paste0(dataset,"/raw_atac/matrix.mtx"))
	peaks<- read_tsv(paste0(dataset,"/raw_atac/peaks.bed"), col_names = F)
	peaks<- peaks %>%
	mutate(id1 = paste(X2, X3, sep = "-")) %>%
	mutate(id = paste(X1, id1, sep = ":"))
	barcodes<- read_tsv(paste0(dataset,"/raw_atac/barcodes.tsv"), col_names =F)
	rownames(mat)<- peaks$id1
	colnames(mat)<- barcodes$X1
	data_atac <- CreateSeuratObject(counts = mat,
	assay = "ATAC",
	project = dataset,
	min.features = 500)
	data_atac <- subset(data_atac, subset = nCount_ATAC > 500 & nCount_ATAC < 10000)
	rm(mat)
	rm(peaks)
	rm(barcodes)
	#Read binarized object and add meta data to raw data
	data_binarized <- readRDS(paste0(dataset, "/scATAC_PMaggr_clusterd_annotated.rds"))
	data_atac <- AddMetaData(object = data_atac,
	metadata = data_binarized@meta.data)
	rm(data_binarized)
	#Read Cicero gene activity matrix
	gene_atac <- readRDS(paste0(dataset,"/PM_geneActivities_dgMat.rds"))


	#Get gene symbols for ENSEMBL IDs
	symbols <- unname(mapIds(org.Hs.eg.db, keys = dimnames(gene_atac)[[1]], keytype = "ENSEMBL", column="SYMBOL"))
	dimnames(gene_atac)[[1]] <- symbols
	gene_atac <- as(gene_atac, "dgTMatrix")
	#Remove duplicated gene symbols from matrix
	OP2 <- function(x) {
	nms <- rownames(x)
	uniquenms <- unique(nms)
	# build the sparseMatrix again: x's with same index values are automatically
	# added together, keeping in mind that indexes stored from 0 but built from 1
	sparseMatrix(i = match(nms, uniquenms)[x@i + 1],
	j = x@j + 1,
	x = x@x,
	dimnames = list(uniquenms, colnames(x)),
	giveCsparse = FALSE)
	}
	gene_atac <- OP2(gene_atac)
	gene_atac <- gene_atac[-which(is.na(rownames(gene_atac))),]
	#Use only cells that are present in both (gene activity matrix and feature matrix)
	cells_intersect <- intersect(colnames(data_atac), colnames(gene_atac))
	data_atac <- subset(data_atac, cells = cells_intersect)
	data_atac[["ACTIVITY"]] <- CreateAssayObject(counts = gene_atac[, cells_intersect])
	rm(gene_atac)
	rm(cells_intersect)
	rm(symbols)

	data_atac$tech <- "atac"


	#Preprocessing of ATAC data -------------------
	DefaultAssay(data_atac) <- "ACTIVITY"
	data_atac <- FindVariableFeatures(data_atac)
	#Normalization already done with Cicero by Michael
	#data_atac <- NormalizeData(data_atac)
	#data_atac <- ScaleData(data_atac)

	DefaultAssay(data_atac) <- "ATAC"
	VariableFeatures(data_atac) <- names(which(Matrix::rowSums(data_atac) > 100))
	data_atac <- RunLSI(data_atac, n = 50, scale.max = NULL)
	#Sequencing depth correlation, remove 1st component of downstream analysis if strong correlation
	DepthCor(data_atac) # --> 1st component can remain
	DepthCor(data_atac, n = 50)
	#Non-linear dimension reduction and clustering
	data_atac <- RunUMAP(data_atac, reduction = "lsi", dims = 1:50)
	data_atac <- FindNeighbors(data_atac, reduction = "lsi", dims = 1:50)
	data_atac$seurat_clusters_michael <- data_atac$seurat_clusters #copy clusters, otherwise overwritten
	data_atac <- FindClusters(data_atac, resolution = 0.1)
	DimPlot(object = data_atac, group = "seurat_clusters", label = TRUE) + NoLegend()

	#Read RNA data with cell type labels
	data_rna <- readRDS(paste0(dataset,"/data_object_sct_integrated.rds"))
	data_rna$tech <- "rna"

	#Plot ATAC and RNA data next to each other (with Michael's labels)
	p1 <- DimPlot(data_atac, reduction = "umap", group.by = "jointId", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scATAC-seq")
	p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scRNA-seq")
	p1 + p2


	#Label transfer ------------------
	#Remove genes from RNA data that are not in ATAC data (for transfer)
	intersect_features <- intersect(dimnames(data_rna@assays$RNA)[[1]],
	dimnames(data_atac@assays$ACTIVITY)[[1]])
	data_rna <- subset(x = data_rna, features = intersect_features)

	#Find Transfer Anchors and Transfer labels
	transfer.anchors <- FindTransferAnchors(reference = data_rna, query = data_atac,
	features = VariableFeatures(object = data_rna),
	reference.assay = "RNA", query.assay = "ACTIVITY",
	reduction = "cca")
	save.image(paste0(dataset,"/preprocessed_atac_data_precalculated_geneactivity.RData"))
	load(paste0(dataset,"/preprocessed_atac_data_precalculated_geneactivity.RData"))
	celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = data_rna$sub_cluster,
	weight.reduction = data_atac[["lsi"]])
	data_atac <- AddMetaData(data_atac, metadata = celltype.predictions)

	#Prediction scores (quality of prediction)
	hist(data_atac$prediction.score.max)
	abline(v = 0.5, col = "red")

	#How many cell have prediction score > 0.5 --> about 50%
	table(data_atac$prediction.score.max > 0.5)

	#Plot UMAPs with transferred cell type labels (ATAC and RNA)
	data_atac$predicted.id <- factor(data_atac$predicted.id, levels = levels(as.factor(data_rna$sub_cluster))) # to make the colors match
	p1 <- DimPlot(data_atac, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("scATAC-seq cells") +
	NoLegend() + scale_colour_hue(drop = FALSE)
	p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNA-seq cells") +
	NoLegend()
	png(paste0(dataset,"/atac_UMAP_labeltransfer.png"), width = 900, height = 600)
	p1 + p2
	dev.off()

	#Plot UMAPs with transferred labels and seurat clusters
	p1 <- DimPlot(data_atac, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("Transferred Labels") +
	NoLegend() + scale_colour_hue(drop = FALSE)
	p2 <- DimPlot(data_atac, group.by = "seurat_clusters", label = TRUE, repel = TRUE) + ggtitle("Seurat Clusters") +
	NoLegend()
	png(paste0(dataset,"/atac_UMAP_labeltransfer_seuratClusters.png"), width = 900, height = 600)
	p1 + p2
	dev.off()




	### Assign cell type clusters to labels ------------
	x <- as.numeric(data_atac$seurat_clusters)
	y <- as.character(data_atac$predicted.id)
	overlap <- table(x, y)

	# Plot relative values from each louvain cluster
	rel_ol <- function(m) m / rep(colSums(m), each = nrow(m))
	overlap_perc <- rel_ol(t(overlap))

	overlap_perc_long <- melt(overlap_perc)
	overlap_perc_long$x <- as.factor(overlap_perc_long$x)

	ggplot(overlap_perc_long, aes(x,y))+
	geom_tile(aes(fill=value))+
	geom_text(aes(label=round(x = value, digits = 2)))+
	scale_fill_gradient(low="white",
	high="darkred")+
	theme(panel.grid.major.x=element_blank(), #no gridlines
	panel.grid.minor.x=element_blank(),
	panel.grid.major.y=element_blank(),
	panel.grid.minor.y=element_blank(),
	panel.background=element_rect(fill="white"), # background=white
	axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
	plot.title = element_text(size=15,face="bold"),
	axis.text.y = element_text(size = 12,face = "bold")) +
	ggtitle("Overlap Louvain Cluster and Transferred Labels")+
	theme(legend.title=element_text(face="bold", size=14)) +
	scale_x_discrete(name="Louvain cluster")+
	scale_y_discrete(name="Transferred Label")+
	labs(fill="Overlap")
	ggsave(paste0(dataset,"/atac_overlap_louvain_transferlabel.png"))

	# Assign labels to louvain clusters according to highest overlap
	ass_cluster <- apply(overlap_perc, 2, function(t)
	rownames(overlap_perc)[which.max(t)])
	ass_vec <- ass_cluster[as.character(as.numeric(data_atac$seurat_clusters))]

	data_atac <- AddMetaData(
	object = data_atac,
	metadata = ass_vec,
	col.name = "seurat_transfer_label"
	)

	p1 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("scATACseq") +
	NoLegend() + scale_colour_hue(drop = FALSE)
	p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNAseq") +
	NoLegend()
	png(paste0(dataset,"/atac_UMAP_labeltransfer_clusters.png"), width = 900, height = 600)
	p1 + p2
	dev.off()


	#Subset cells from seurat cluster 0 and refine assignment
	data_0 <- subset(data_atac, subset = seurat_clusters == 0)
	data_0 <- FindNeighbors(data_0, reduction = "lsi", dims = 1:50)
	data_0 <- FindClusters(data_0, resolution = 0.1)
	#Overlap
	x <- as.numeric(data_0$seurat_clusters)
	y <- as.character(data_0$predicted.id)
	overlap <- table(x, y)

	overlap_perc <- rel_ol(t(overlap))
	overlap_perc_long <- melt(overlap_perc)
	overlap_perc_long$x <- as.factor(overlap_perc_long$x)

	ggplot(overlap_perc_long, aes(x,y))+
	geom_tile(aes(fill=value))+
	geom_text(aes(label=round(x = value, digits = 2)))+
	scale_fill_gradient(low="white",
	high="darkred")+
	theme(panel.grid.major.x=element_blank(), #no gridlines
	panel.grid.minor.x=element_blank(),
	panel.grid.major.y=element_blank(),
	panel.grid.minor.y=element_blank(),
	panel.background=element_rect(fill="white"), # background=white
	axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
	plot.title = element_text(size=15,face="bold"),
	axis.text.y = element_text(size = 12,face = "bold")) +
	ggtitle("Overlap Louvain Subcluster (Cluster 0) and Transferred Labels")+
	theme(legend.title=element_text(face="bold", size=14)) +
	scale_x_discrete(name="Louvain cluster")+
	scale_y_discrete(name="Transferred Label")+
	labs(fill="Overlap")
	ggsave(paste0(dataset,"/atac_overlap_louvain_cluster0.png"), width = 12, height = 18)

	#Assign labels to louvain clusters according to highest overlap
	ass_cluster <- apply(overlap_perc, 2, function(t)
	rownames(overlap_perc)[which.max(t)])
	ass_vec <- ass_cluster[as.character(as.numeric(data_0$seurat_clusters))]

	data_0 <- AddMetaData(
	object = data_0,
	metadata = ass_vec,
	col.name = "seurat_transfer_cluster0"
	)

	#Merge cluster assignments of Cluster 0 to data object
	data_atac$seurat_transfer_label[Cells(data_0)] <- data_0$seurat_transfer_cluster0


	#Set Cluster 1 to "Unknown" since these cells are most likely not excitatory neurons
	data_atac$seurat_transfer_label[data_atac$seurat_clusters == 1] <- "Unknown"


	#Plot UMAP with refined assignment
	p1 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("scATACseq") +
	NoLegend() + scale_colour_hue(drop = FALSE)
	p2 <- DimPlot(data_rna, group.by = "sub_cluster", label = TRUE, repel = TRUE) + ggtitle("scRNAseq") +
	NoLegend()
	png(paste0(dataset,"/atac_UMAP_labeltransfer_clustersRefined.png"), width = 900, height = 600)
	p1 + p2
	dev.off()



	### Overlap between transferred labels and Michael's annotations ------------
	x <- as.character(data_atac$jointId)
	y <- as.character(data_atac$predicted.id)
	overlap <- table(x, y)

	# Plot relative values from each louvain cluster
	rel_ol <- function(m) m / rep(colSums(m), each = nrow(m))
	overlap_perc <- rel_ol(t(overlap))

	overlap_perc_long <- melt(overlap_perc)
	overlap_perc_long$x <- as.factor(overlap_perc_long$x)

	ggplot(overlap_perc_long, aes(x,y))+
	geom_tile(aes(fill=value))+
	geom_text(aes(label=round(x = value, digits = 2)))+
	scale_fill_gradient(low="white",
	high="darkred")+
	theme(panel.grid.major.x=element_blank(), #no gridlines
	panel.grid.minor.x=element_blank(),
	panel.grid.major.y=element_blank(),
	panel.grid.minor.y=element_blank(),
	panel.background=element_rect(fill="white"), # background=white
	axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
	plot.title = element_text(size=15,face="bold"),
	axis.text.y = element_text(size = 12,face = "bold")) +
	ggtitle("Overlap Michael's annotations and Transferred Labels")+
	theme(legend.title=element_text(face="bold", size=14)) +
	scale_x_discrete(name="Michael's annotations")+
	scale_y_discrete(name="Transferred Label")+
	labs(fill="Overlap")
	ggsave(paste0(dataset,"/atac_overlap_michaelsannotations.png"))

	p1 <- DimPlot(data_atac, group.by = "jointId", label = TRUE, repel = TRUE) + ggtitle("Michael's annotations") +
	NoLegend() + scale_colour_hue(drop = FALSE)
	p2 <- DimPlot(data_atac, group.by = "seurat_transfer_label", label = TRUE, repel = TRUE) + ggtitle("Transferred Labels") +
	NoLegend()
	png(paste0(dataset,"/atac_UMAP_labeltransfer_comparisonMichaelsAnnotations.png"), width = 900, height = 600)
	p1 + p2
	dev.off()

	#Save Seurat object
	saveRDS(data_atac, file = paste0(dataset,"/scATAC_labeltransfer.rds"))


	# # COEMBEDDING
	# # restrict the imputation to variable genes from scRNA-seq
	# genes.use <- VariableFeatures(data_rna)
	# refdata <- GetAssayData(data_rna, assay = "RNA", slot = "data")[genes.use, ]
	#
	# # refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells. imputation
	# # (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
	# imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = data_atac[["lsi"]])
	#
	# # add the imputed data matrix to the atac object
	# data_atac[["ATAC"]] <- imputation
	# coembed <- merge(x = data_rna, y = data_atac)
	#
	# # run PCA and UMAP on combined object, to visualize the co-embedding of both datasets
	# coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
	# coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
	# coembed <- RunUMAP(coembed, dims = 1:30)
	# coembed$celltype <- ifelse(!is.na(coembed$sub_cluster), coembed$sub_cluster, coembed$predicted.id)
	#
	# # plot co-embedding
	# p1 <- DimPlot(coembed, group.by = "tech")
	# p2 <- DimPlot(coembed, group.by = "celltype", label = TRUE, repel = TRUE)
	# png(paste0(dataset,"/atac_UMAP_coembedding.png"), width = 900, height = 600)
	# p1 + p2
	# dev.off()
	#