This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReptilePallium/Dim_reduction_clustering_DEanalysis/DE_analysis_functions.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
179 lines (150 sloc)
8.13 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MAST) | |
library(rpca) | |
library(Seurat) | |
library(data.table) | |
# set options(mc.cores=N) to run on multiple cores; useful on the server | |
# object = Seurat object | |
# id1 = identity 1 (from clustering) | |
# id2 = identity 2 | |
# min.pct = minimum percentage of cells expressing the gene | |
# thresh.use = minimum AVERAGE DIFFERENCE for filtering, in log scale (base e), e.g. for 1.5 fold change, type log(1.5) | |
# data.info.col = column number for columns in data.info that contain metadata (not clusters...) | |
FindMarkers.MAST <- function(object, id1, id2, min.pct, thresh.use,data.info.col = 1:13){ | |
cells.1 <- names(object@ident[object@ident == id1]) | |
cells.2 <- names(object@ident[object@ident == id2]) | |
cells.to.compare <- c(cells.1,cells.2) | |
raw.neur.counts <- as.matrix(object@raw.data[,object@cell.names]) | |
neur.alldata <- log(raw.neur.counts + 1)/log(2) # i.e. log2 base | |
genes.use = rownames(object@data) | |
thresh.min = 0 | |
data.temp1=round(apply(neur.alldata[genes.use, cells.1, drop = F],1,function(x)return(length(x[x>thresh.min])/length(x))),3) | |
data.temp2=round(apply(neur.alldata[genes.use, cells.2, drop = F],1,function(x)return(length(x[x>thresh.min])/length(x))),3) | |
data.alpha=cbind(data.temp1,data.temp2); colnames(data.alpha)=c("pct.1","pct.2") | |
alpha.min=apply(data.alpha,1,max) | |
names(alpha.min)=rownames(data.alpha) | |
genes.use=names(which(alpha.min>min.pct)) | |
neur.data <- neur.alldata[genes.use, cells.to.compare] | |
symbolid <- rownames(neur.data) | |
primerid <- symbolid | |
rownames(neur.data) <- primerid | |
neur.cData <- as.data.frame(cbind(as.character(object@ident), object@data.info[,data.info.col])) | |
colnames(neur.cData) <- c("cluster",colnames(object@data.info)[data.info.col]) | |
rownames(neur.cData) <- names(object@ident) # line added on 170825 | |
neur.cData$cluster <- as.character(neur.cData$cluster) | |
neur.cData <- neur.cData[cells.to.compare,] | |
neur.cData$wellKey <- as.character(cells.to.compare) | |
neur.fData <- cbind(primerid,symbolid) | |
neur.fData <- as.data.frame(neur.fData) | |
neur.fData[,1] <- as.character(neur.fData[,1]) | |
neur.to.compare <- FromMatrix(neur.data, neur.cData, neur.fData) | |
colData(neur.to.compare)$cngeneson <- scale(colSums(assay(neur.to.compare)>0)) # this takes Z scores of cell size factors | |
colData(neur.to.compare)$cluster <- as.factor(colData(neur.to.compare)$cluster) | |
colData(neur.to.compare)$cluster <- relevel(colData(neur.to.compare)$cluster, as.character(id1)) | |
zlmCond <- zlm.SingleCellAssay(~ cluster + cngeneson, neur.to.compare) | |
summaryCond <- summary(zlmCond, doLRT=paste0("cluster", id2)) | |
summaryDt <- summaryCond$datatable | |
summaryDt2 <- summaryDt | |
set1 <- summaryDt2[(contrast==paste0("cluster", id2) & component == "H"),c(1,4)] | |
colnames(set1) <- c("primerid","Pr") | |
set2 <- summaryDt2[(contrast==paste0("cluster", id2) & component == "logFC"), .(primerid, coef, ci.hi, ci.lo)] | |
fcHurdle <- merge(set1,set2, by="primerid") | |
fcHurdle[,fdr:=p.adjust(Pr, "fdr")] | |
expMean=function(x) { | |
return(log(mean(exp(x)-1)+1)) | |
} | |
data.avg.diff <- as.matrix(object@data[rownames(object@data) %in% genes.use, colnames(object@data) %in% cells.to.compare]) | |
data1 <- data.avg.diff[,cells.1] | |
data2 <- data.avg.diff[,cells.2] | |
genes.use2<-rownames(data.avg.diff) | |
avg_diff=unlist(lapply(genes.use2,function(x)(expMean(as.numeric(data1[x,]))-expMean(as.numeric(data2[x,]))))) | |
names(avg_diff)<-rownames(data.avg.diff) | |
fcHurdle2 <- as.data.frame(fcHurdle) | |
rownames(fcHurdle2)<-fcHurdle2$primerid | |
avg_diff <- avg_diff[rownames(fcHurdle2)] | |
fcHurdle3 <- cbind(fcHurdle2, avg_diff) | |
fcHurdleSig <- fcHurdle3[fcHurdle3$fdr<0.05 & abs(fcHurdle3$avg_diff)>thresh.use,] | |
fcHurdleSig <- cbind(fcHurdleSig[,c(2:3,6:7)],data.alpha[rownames(fcHurdleSig),]) | |
fcHurdleSig <- fcHurdleSig[order(abs(fcHurdleSig$avg_diff),decreasing=T),] | |
return(fcHurdleSig) | |
} | |
#######################Couple of Functions for identifying DEgenes across specificed clusters############# | |
##identify DEgenes using MAST function above | |
DE_Gene_Union = function(SeuratObject,clusters,min.pct=0.4,thresh.use=log(2), data.info.col = 1:13){ | |
DEgenes = list(character(),character(),character(),character()) | |
names(DEgenes) = c('union_fdr<0.05','union_fdr<0.01','union_fdr<0.005','union_fdr<1e-5') | |
combinations = t(combn(as.character(clusters),2)) | |
colnames(combinations) = c("ident.1","ident.2") | |
nDEgenes = matrix(nrow=nrow(combinations),ncol=4) | |
colnames(nDEgenes) = c('union_fdr<0.05','union_fdr<0.01','union_fdr<0.005','union_fdr<1e-5') | |
rownames(nDEgenes) = paste('cluster',combinations[,1],'cluster',combinations[,2],sep='_') | |
pb <- txtProgressBar(1, 100, style=3) | |
for (i in 1:nrow(combinations)){ | |
a = combinations[i,1] | |
b = combinations[i,2] | |
suppressMessages(diff.genes <- FindMarkers.MAST(SeuratObject,a,b,min.pct,thresh.use,data.info.col)) | |
#matrix from FindMarkers.MAST only contains genes that have a fdr<0.05 | |
DEgenes[[paste("cluster",a,"cluster",b, sep="_")]] = diff.genes | |
DEgenes[['union_fdr<0.05']] = union(DEgenes[['union_fdr<0.05']],rownames(diff.genes)) | |
DEgenes[['union_fdr<0.01']] = union(DEgenes[['union_fdr<0.01']],rownames(diff.genes[diff.genes$fdr<0.01,])) | |
DEgenes[['union_fdr<0.005']] = union(DEgenes[['union_fdr<0.005']],rownames(diff.genes[diff.genes$fdr<0.005,])) | |
DEgenes[['union_fdr<1e-5']] = union(DEgenes[['union_fdr<1e-5']],rownames(diff.genes[diff.genes$fdr<1e-5,])) | |
nDEgenes[i,1] = nrow(diff.genes) | |
nDEgenes[i,2] = nrow(diff.genes[diff.genes$fdr<0.01,]) | |
nDEgenes[i,3] = nrow(diff.genes[diff.genes$fdr<0.005,]) | |
nDEgenes[i,4] = nrow(diff.genes[diff.genes$fdr<1e-5,]) | |
rm(list = c('diff.genes','a','b')) | |
setTxtProgressBar(pb, (i*100)/nrow(combinations)) | |
} | |
DEgenes[['nDEgenes']] = nDEgenes | |
DEgenes[['nDEgene_union']] = sapply(DEgenes[1:4],length) | |
return(DEgenes) | |
} | |
#function to prune DE_Gene_Union list generated above according to fdr, min.pct, thresh.use | |
#input is output of function above | |
prune_DE_Gene_Union= function (DE_Gene_Union_list,new.fdr=NULL,new.min.pct=0,new.thresh=0){ | |
mast.list = DE_Gene_Union_list[grep('cluster',names(DE_Gene_Union_list))] | |
nDEgenes = DE_Gene_Union_list[['nDEgenes']] | |
union.list = DE_Gene_Union_list[grep('union_fdr',names(DE_Gene_Union_list))] | |
if(new.min.pct>0 | new.thresh>0){ | |
fdrs = substring(colnames(nDEgenes),11) | |
fdrs = as.numeric(fdrs) | |
nDEgenes = matrix(nrow=length(mast.list),ncol=length(fdrs)) | |
rownames(nDEgenes) = names(mast.list) | |
colnames(nDEgenes) = paste('union_fdr<',fdrs,sep='') | |
for (i in 1:length(union.list)){ | |
union.list[[i]] = character() | |
} | |
for(i in 1:length(mast.list)){ | |
a = mast.list[[i]] | |
a = a[a$pct.1>new.min.pct | a$pct.2>new.min.pct,] | |
a = a[abs(a$avg_diff)>new.thresh,] | |
mast.list[[i]] = a | |
for (j in 1:length(union.list)){ | |
b = a[a$fdr < fdrs[j],] | |
union.list[[j]] = union(union.list[[j]],rownames(b)) | |
nDEgenes[i,j] = nrow(b) | |
rm(b) | |
} | |
rm(a) | |
} | |
} | |
if(!is.null(new.fdr)){ | |
new.fdr.union = character() | |
nDEgenes = cbind(nDEgenes,numeric(length=nrow(nDEgenes))) | |
colnames(nDEgenes)[ncol(nDEgenes)] = paste('diff.genes$fdr<',new.fdr,sep="") | |
for (i in 1:length(mast.list)){ | |
a = mast.list[[i]] | |
a = a[a$fdr<new.fdr,] | |
new.fdr.union = union(new.fdr.union,rownames(a)) | |
nDEgenes[i,ncol(nDEgenes)] = nrow(a) | |
rm(a) | |
} | |
union.list[[paste('union_fdr<',new.fdr,sep="")]] = new.fdr.union | |
} | |
nDEgene_union = sapply(union.list,length) | |
nDEgene_union = list(nDEgene_union) | |
names(nDEgene_union) = c('nDEgene_union') | |
nDEgenes = list(nDEgenes) | |
names(nDEgenes) = c('nDEgenes') | |
return(c(union.list,mast.list,nDEgenes,nDEgene_union)) | |
} | |