Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
b80d5cec85
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
179 lines (150 sloc) 8.13 KB
library(MAST)
library(rpca)
library(Seurat)
library(data.table)
# set options(mc.cores=N) to run on multiple cores; useful on the server
# object = Seurat object
# id1 = identity 1 (from clustering)
# id2 = identity 2
# min.pct = minimum percentage of cells expressing the gene
# thresh.use = minimum AVERAGE DIFFERENCE for filtering, in log scale (base e), e.g. for 1.5 fold change, type log(1.5)
# data.info.col = column number for columns in data.info that contain metadata (not clusters...)
FindMarkers.MAST <- function(object, id1, id2, min.pct, thresh.use,data.info.col = 1:13){
cells.1 <- names(object@ident[object@ident == id1])
cells.2 <- names(object@ident[object@ident == id2])
cells.to.compare <- c(cells.1,cells.2)
raw.neur.counts <- as.matrix(object@raw.data[,object@cell.names])
neur.alldata <- log(raw.neur.counts + 1)/log(2) # i.e. log2 base
genes.use = rownames(object@data)
thresh.min = 0
data.temp1=round(apply(neur.alldata[genes.use, cells.1, drop = F],1,function(x)return(length(x[x>thresh.min])/length(x))),3)
data.temp2=round(apply(neur.alldata[genes.use, cells.2, drop = F],1,function(x)return(length(x[x>thresh.min])/length(x))),3)
data.alpha=cbind(data.temp1,data.temp2); colnames(data.alpha)=c("pct.1","pct.2")
alpha.min=apply(data.alpha,1,max)
names(alpha.min)=rownames(data.alpha)
genes.use=names(which(alpha.min>min.pct))
neur.data <- neur.alldata[genes.use, cells.to.compare]
symbolid <- rownames(neur.data)
primerid <- symbolid
rownames(neur.data) <- primerid
neur.cData <- as.data.frame(cbind(as.character(object@ident), object@data.info[,data.info.col]))
colnames(neur.cData) <- c("cluster",colnames(object@data.info)[data.info.col])
rownames(neur.cData) <- names(object@ident) # line added on 170825
neur.cData$cluster <- as.character(neur.cData$cluster)
neur.cData <- neur.cData[cells.to.compare,]
neur.cData$wellKey <- as.character(cells.to.compare)
neur.fData <- cbind(primerid,symbolid)
neur.fData <- as.data.frame(neur.fData)
neur.fData[,1] <- as.character(neur.fData[,1])
neur.to.compare <- FromMatrix(neur.data, neur.cData, neur.fData)
colData(neur.to.compare)$cngeneson <- scale(colSums(assay(neur.to.compare)>0)) # this takes Z scores of cell size factors
colData(neur.to.compare)$cluster <- as.factor(colData(neur.to.compare)$cluster)
colData(neur.to.compare)$cluster <- relevel(colData(neur.to.compare)$cluster, as.character(id1))
zlmCond <- zlm.SingleCellAssay(~ cluster + cngeneson, neur.to.compare)
summaryCond <- summary(zlmCond, doLRT=paste0("cluster", id2))
summaryDt <- summaryCond$datatable
summaryDt2 <- summaryDt
set1 <- summaryDt2[(contrast==paste0("cluster", id2) & component == "H"),c(1,4)]
colnames(set1) <- c("primerid","Pr")
set2 <- summaryDt2[(contrast==paste0("cluster", id2) & component == "logFC"), .(primerid, coef, ci.hi, ci.lo)]
fcHurdle <- merge(set1,set2, by="primerid")
fcHurdle[,fdr:=p.adjust(Pr, "fdr")]
expMean=function(x) {
return(log(mean(exp(x)-1)+1))
}
data.avg.diff <- as.matrix(object@data[rownames(object@data) %in% genes.use, colnames(object@data) %in% cells.to.compare])
data1 <- data.avg.diff[,cells.1]
data2 <- data.avg.diff[,cells.2]
genes.use2<-rownames(data.avg.diff)
avg_diff=unlist(lapply(genes.use2,function(x)(expMean(as.numeric(data1[x,]))-expMean(as.numeric(data2[x,])))))
names(avg_diff)<-rownames(data.avg.diff)
fcHurdle2 <- as.data.frame(fcHurdle)
rownames(fcHurdle2)<-fcHurdle2$primerid
avg_diff <- avg_diff[rownames(fcHurdle2)]
fcHurdle3 <- cbind(fcHurdle2, avg_diff)
fcHurdleSig <- fcHurdle3[fcHurdle3$fdr<0.05 & abs(fcHurdle3$avg_diff)>thresh.use,]
fcHurdleSig <- cbind(fcHurdleSig[,c(2:3,6:7)],data.alpha[rownames(fcHurdleSig),])
fcHurdleSig <- fcHurdleSig[order(abs(fcHurdleSig$avg_diff),decreasing=T),]
return(fcHurdleSig)
}
#######################Couple of Functions for identifying DEgenes across specificed clusters#############
##identify DEgenes using MAST function above
DE_Gene_Union = function(SeuratObject,clusters,min.pct=0.4,thresh.use=log(2), data.info.col = 1:13){
DEgenes = list(character(),character(),character(),character())
names(DEgenes) = c('union_fdr<0.05','union_fdr<0.01','union_fdr<0.005','union_fdr<1e-5')
combinations = t(combn(as.character(clusters),2))
colnames(combinations) = c("ident.1","ident.2")
nDEgenes = matrix(nrow=nrow(combinations),ncol=4)
colnames(nDEgenes) = c('union_fdr<0.05','union_fdr<0.01','union_fdr<0.005','union_fdr<1e-5')
rownames(nDEgenes) = paste('cluster',combinations[,1],'cluster',combinations[,2],sep='_')
pb <- txtProgressBar(1, 100, style=3)
for (i in 1:nrow(combinations)){
a = combinations[i,1]
b = combinations[i,2]
suppressMessages(diff.genes <- FindMarkers.MAST(SeuratObject,a,b,min.pct,thresh.use,data.info.col))
#matrix from FindMarkers.MAST only contains genes that have a fdr<0.05
DEgenes[[paste("cluster",a,"cluster",b, sep="_")]] = diff.genes
DEgenes[['union_fdr<0.05']] = union(DEgenes[['union_fdr<0.05']],rownames(diff.genes))
DEgenes[['union_fdr<0.01']] = union(DEgenes[['union_fdr<0.01']],rownames(diff.genes[diff.genes$fdr<0.01,]))
DEgenes[['union_fdr<0.005']] = union(DEgenes[['union_fdr<0.005']],rownames(diff.genes[diff.genes$fdr<0.005,]))
DEgenes[['union_fdr<1e-5']] = union(DEgenes[['union_fdr<1e-5']],rownames(diff.genes[diff.genes$fdr<1e-5,]))
nDEgenes[i,1] = nrow(diff.genes)
nDEgenes[i,2] = nrow(diff.genes[diff.genes$fdr<0.01,])
nDEgenes[i,3] = nrow(diff.genes[diff.genes$fdr<0.005,])
nDEgenes[i,4] = nrow(diff.genes[diff.genes$fdr<1e-5,])
rm(list = c('diff.genes','a','b'))
setTxtProgressBar(pb, (i*100)/nrow(combinations))
}
DEgenes[['nDEgenes']] = nDEgenes
DEgenes[['nDEgene_union']] = sapply(DEgenes[1:4],length)
return(DEgenes)
}
#function to prune DE_Gene_Union list generated above according to fdr, min.pct, thresh.use
#input is output of function above
prune_DE_Gene_Union= function (DE_Gene_Union_list,new.fdr=NULL,new.min.pct=0,new.thresh=0){
mast.list = DE_Gene_Union_list[grep('cluster',names(DE_Gene_Union_list))]
nDEgenes = DE_Gene_Union_list[['nDEgenes']]
union.list = DE_Gene_Union_list[grep('union_fdr',names(DE_Gene_Union_list))]
if(new.min.pct>0 | new.thresh>0){
fdrs = substring(colnames(nDEgenes),11)
fdrs = as.numeric(fdrs)
nDEgenes = matrix(nrow=length(mast.list),ncol=length(fdrs))
rownames(nDEgenes) = names(mast.list)
colnames(nDEgenes) = paste('union_fdr<',fdrs,sep='')
for (i in 1:length(union.list)){
union.list[[i]] = character()
}
for(i in 1:length(mast.list)){
a = mast.list[[i]]
a = a[a$pct.1>new.min.pct | a$pct.2>new.min.pct,]
a = a[abs(a$avg_diff)>new.thresh,]
mast.list[[i]] = a
for (j in 1:length(union.list)){
b = a[a$fdr < fdrs[j],]
union.list[[j]] = union(union.list[[j]],rownames(b))
nDEgenes[i,j] = nrow(b)
rm(b)
}
rm(a)
}
}
if(!is.null(new.fdr)){
new.fdr.union = character()
nDEgenes = cbind(nDEgenes,numeric(length=nrow(nDEgenes)))
colnames(nDEgenes)[ncol(nDEgenes)] = paste('diff.genes$fdr<',new.fdr,sep="")
for (i in 1:length(mast.list)){
a = mast.list[[i]]
a = a[a$fdr<new.fdr,]
new.fdr.union = union(new.fdr.union,rownames(a))
nDEgenes[i,ncol(nDEgenes)] = nrow(a)
rm(a)
}
union.list[[paste('union_fdr<',new.fdr,sep="")]] = new.fdr.union
}
nDEgene_union = sapply(union.list,length)
nDEgene_union = list(nDEgene_union)
names(nDEgene_union) = c('nDEgene_union')
nDEgenes = list(nDEgenes)
names(nDEgenes) = c('nDEgenes')
return(c(union.list,mast.list,nDEgenes,nDEgene_union))
}