This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReptilePallium/Comparative_analysis/ComparativeAnalysis_CleanFunctions.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
729 lines (614 sloc)
31.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#make sure these files are in your working directory: | |
#chrysemys_eggnog_pruned.txt | |
#pogona_eggnog_pruned.txt | |
#gallus_eggnog_pruned.txt | |
#homo_eggnog_pruned.txt | |
#mus_eggnog_pruned.txt | |
####################################METHOD1##################### | |
####################################Specificity Correlation##### | |
#scale to gene expression average | |
SpecificityCorrelation = function(ExpressionTableSpecies1, species1 = c('turtle','lizard'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2){ | |
#Step1: Load eggnog tables | |
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="mouse") { | |
eggnog1 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog1[2:nrow(eggnog1),1] = toupper(eggnog1[2:nrow(eggnog1),1]) | |
DEgenesSpecies1 = toupper(DEgenesSpecies1) | |
rownames(ExpressionTableSpecies1) = toupper(rownames(ExpressionTableSpecies1)) | |
} | |
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="mouse") { | |
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1]) | |
DEgenesSpecies2 = toupper(DEgenesSpecies2) | |
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2)) | |
} | |
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
colnames(eggnog1) = eggnog1[1,] | |
eggnog1 = eggnog1[2:nrow(eggnog1),] | |
colnames(eggnog2) = eggnog2[1,] | |
eggnog2 = eggnog2[2:nrow(eggnog2),] | |
#Step2: Take union of DEgenes for analysis | |
DEgenesSpecies1 = eggnog1[eggnog1[,1] %in% DEgenesSpecies1,2] | |
DEgenesSpecies2 = eggnog2[eggnog2[,1] %in% DEgenesSpecies2,2] | |
DEgenes = union(DEgenesSpecies1,DEgenesSpecies2) | |
DEgenesSpecies1 = eggnog1[eggnog1[,2] %in% DEgenes,1] | |
DEgenesSpecies2 = eggnog2[eggnog2[,2] %in% DEgenes,1] | |
#Step3: Prune Expression Tables & Remove rows with no expression | |
Sp1 = ExpressionTableSpecies1[rownames(ExpressionTableSpecies1) %in% DEgenesSpecies1,] | |
Sp1 = Sp1[rowSums (Sp1)!=0,] | |
Sp2 = ExpressionTableSpecies2[rownames(ExpressionTableSpecies2) %in% DEgenesSpecies2,] | |
Sp2 = Sp2[rowSums (Sp2)!=0,] | |
#Step4: Replace Gene names with Eggnog name | |
Sp1[,ncol(Sp1)+1] = rownames(Sp1) | |
colnames(Sp1)[ncol(Sp1)] = species1 | |
Sp1 = merge(eggnog1, Sp1, by =species1, all = F) | |
rownames(Sp1) = Sp1$eggnog | |
Sp1 = Sp1[,3:ncol(Sp1)] | |
Sp2[,ncol(Sp2)+1] = rownames(Sp2) | |
colnames(Sp2)[ncol(Sp2)] = species2 | |
Sp2 = merge(eggnog2, Sp2, by =species2, all = F) | |
rownames(Sp2) = Sp2$eggnog | |
Sp2 = Sp2[,3:ncol(Sp2)] | |
#Step5: Scale Expression Tables by gene average | |
avg = rowMeans(Sp1) | |
Sp1 = sweep(Sp1,1,avg,"/") | |
rm(avg) | |
avg = rowMeans(Sp2) | |
Sp2 = sweep(Sp2,1,avg,"/") | |
rm(avg) | |
#Step6: Merge Expression Tables | |
geTable = merge(Sp1,Sp2, by='row.names', all=F) | |
rownames(geTable) = geTable$Row.names | |
geTable = geTable[,2:ncol(geTable)] | |
#Step7: Correlation | |
Corr.Coeff.Table = diag(x = 1, ncol(geTable), ncol(geTable)) | |
rownames(Corr.Coeff.Table) = colnames(geTable) | |
colnames(Corr.Coeff.Table) = colnames(geTable) | |
p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable)) | |
rownames(p.value.table) = colnames(geTable) | |
colnames(p.value.table) = colnames(geTable) | |
a = combn(1:ncol(geTable),2) | |
n4BHcorrect = ncol(geTable)*ncol(geTable) | |
for (i in 1:ncol(a)){ | |
cell_type1 = colnames(geTable)[a[1,i]] | |
cell_type2 = colnames(geTable)[a[2,i]] | |
b = suppressWarnings(cor.test(geTable[,cell_type1], geTable[,cell_type2], adjust='none',alternative = "two.sided", method='pearson')) | |
Corr.Coeff.Table[a[1,i],a[2,i]] = b$estimate | |
Corr.Coeff.Table[a[2,i],a[1,i]] = b$estimate | |
BHcorrected.p.value = p.adjust(b$p.value, method='BH',n=n4BHcorrect) | |
p.value.table[a[1,i],a[2,i]] = BHcorrected.p.value | |
p.value.table[a[2,i],a[1,i]] = BHcorrected.p.value | |
rm(list =c('cell_type1', 'cell_type2','b')) | |
} | |
neg.log10.p = -log10(p.value.table) | |
list.to.return = list(Corr.Coeff.Table,p.value.table,neg.log10.p,DEgenes,rownames(geTable),length(DEgenes),length(rownames(geTable)),nDESp1,nDESp2) | |
names(list.to.return) = c('corr.coeff','p.value','neg.log10.p.value','DEgenes_union','DEgenes_in_analysis','nDEgenes_union','nDEgenes_in_analysis',paste('nDEgenes',species1,sep='_'),paste('nDEgenes',species2,sep='_')) | |
return(list.to.return) | |
} | |
#Specificity Using Intersect of Gene Lists | |
SpIntersect = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','mouse'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2){ | |
#Step1: Load eggnog tables | |
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="mouse") { | |
eggnog1 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog1[2:nrow(eggnog1),1] = toupper(eggnog1[2:nrow(eggnog1),1]) | |
DEgenesSpecies1 = toupper(DEgenesSpecies1) | |
rownames(ExpressionTableSpecies1) = toupper(rownames(ExpressionTableSpecies1)) | |
} | |
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="mouse") { | |
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1]) | |
DEgenesSpecies2 = toupper(DEgenesSpecies2) | |
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2)) | |
} | |
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
colnames(eggnog1) = eggnog1[1,] | |
eggnog1 = eggnog1[2:nrow(eggnog1),] | |
colnames(eggnog2) = eggnog2[1,] | |
eggnog2 = eggnog2[2:nrow(eggnog2),] | |
#Step2: Take union of DEgenes for analysis | |
DEgenesSpecies1 = eggnog1[eggnog1[,1] %in% DEgenesSpecies1,2] | |
nDESp1 = length(DEgenesSpecies1) | |
DEgenesSpecies2 = eggnog2[eggnog2[,1] %in% DEgenesSpecies2,2] | |
nDESp2 = length(DEgenesSpecies2) | |
DEgenes = intersect(DEgenesSpecies1,DEgenesSpecies2) | |
DEgenesSpecies1 = eggnog1[eggnog1[,2] %in% DEgenes,1] | |
DEgenesSpecies2 = eggnog2[eggnog2[,2] %in% DEgenes,1] | |
#Step3: Prune Expression Tables & Remove rows with no expression | |
Sp1 = ExpressionTableSpecies1[rownames(ExpressionTableSpecies1) %in% DEgenesSpecies1,] | |
Sp1 = Sp1[rowSums (Sp1)!=0,] | |
Sp2 = ExpressionTableSpecies2[rownames(ExpressionTableSpecies2) %in% DEgenesSpecies2,] | |
Sp2 = Sp2[rowSums (Sp2)!=0,] | |
#Step4: Replace Gene names with Eggnog name | |
Sp1[,ncol(Sp1)+1] = rownames(Sp1) | |
colnames(Sp1)[ncol(Sp1)] = species1 | |
Sp1 = merge(eggnog1, Sp1, by =species1, all = F) | |
rownames(Sp1) = Sp1$eggnog | |
Sp1 = Sp1[,3:ncol(Sp1)] | |
Sp2[,ncol(Sp2)+1] = rownames(Sp2) | |
colnames(Sp2)[ncol(Sp2)] = species2 | |
Sp2 = merge(eggnog2, Sp2, by =species2, all = F) | |
rownames(Sp2) = Sp2$eggnog | |
Sp2 = Sp2[,3:ncol(Sp2)] | |
#Step5: Scale Expression Tables by gene average | |
avg = rowMeans(Sp1) | |
Sp1 = sweep(Sp1,1,avg,"/") | |
rm(avg) | |
avg = rowMeans(Sp2) | |
Sp2 = sweep(Sp2,1,avg,"/") | |
rm(avg) | |
#Step6: Merge Expression Tables | |
geTable = merge(Sp1,Sp2, by='row.names', all=F) | |
rownames(geTable) = geTable$Row.names | |
geTable = geTable[,2:ncol(geTable)] | |
#Step7: Correlation | |
Corr.Coeff.Table = diag(x = 1, ncol(geTable), ncol(geTable)) | |
rownames(Corr.Coeff.Table) = colnames(geTable) | |
colnames(Corr.Coeff.Table) = colnames(geTable) | |
p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable)) | |
rownames(p.value.table) = colnames(geTable) | |
colnames(p.value.table) = colnames(geTable) | |
a = combn(1:ncol(geTable),2) | |
n4BHcorrect = ncol(geTable)*ncol(geTable) | |
for (i in 1:ncol(a)){ | |
cell_type1 = colnames(geTable)[a[1,i]] | |
cell_type2 = colnames(geTable)[a[2,i]] | |
b = suppressWarnings(cor.test(geTable[,cell_type1], geTable[,cell_type2], adjust='none',alternative = "two.sided", method='pearson')) | |
Corr.Coeff.Table[a[1,i],a[2,i]] = b$estimate | |
Corr.Coeff.Table[a[2,i],a[1,i]] = b$estimate | |
BHcorrected.p.value = p.adjust(b$p.value, method='BH',n=n4BHcorrect) | |
p.value.table[a[1,i],a[2,i]] = BHcorrected.p.value | |
p.value.table[a[2,i],a[1,i]] = BHcorrected.p.value | |
rm(list =c('cell_type1', 'cell_type2','b')) | |
} | |
neg.log10.p = -log10(p.value.table) | |
list.to.return = list(Corr.Coeff.Table,p.value.table,neg.log10.p,DEgenes,rownames(geTable),length(DEgenes),length(rownames(geTable)),nDESp1,nDESp2) | |
names(list.to.return) = c('corr.coeff','p.value','neg.log10.p.value','DEgenes_intersect','DEgenes_in_analysis','nDEgenes_intersect','nDEgenes_in_analysis',paste('nDEgenes',species1,sep='_'),paste('nDEgenes',species2,sep='_')) | |
return(list.to.return) | |
} | |
####################################METHOD2##################### | |
####################################Rank Correlation##### | |
#scale to gene expression average | |
RankCorrelation = function(ExpressionTableSpecies1, species1 = c('turtle','lizard'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2){ | |
#Step1: Load eggnog tables | |
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="mouse") { | |
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1]) | |
DEgenesSpecies2 = toupper(DEgenesSpecies2) | |
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2)) | |
} | |
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
colnames(eggnog1) = eggnog1[1,] | |
eggnog1 = eggnog1[2:nrow(eggnog1),] | |
colnames(eggnog2) = eggnog2[1,] | |
eggnog2 = eggnog2[2:nrow(eggnog2),] | |
#Step2: Take union of DEgenes for analysis | |
DEgenesSpecies1 = eggnog1[eggnog1[,1] %in% DEgenesSpecies1,2] | |
DEgenesSpecies2 = eggnog2[eggnog2[,1] %in% DEgenesSpecies2,2] | |
DEgenes = union(DEgenesSpecies1,DEgenesSpecies2) | |
DEgenesSpecies1 = eggnog1[eggnog1[,2] %in% DEgenes,1] | |
DEgenesSpecies2 = eggnog2[eggnog2[,2] %in% DEgenes,1] | |
#Step3: Prune Expression Tables | |
Sp1 = ExpressionTableSpecies1[rownames(ExpressionTableSpecies1) %in% DEgenesSpecies1,] | |
Sp2 = ExpressionTableSpecies2[rownames(ExpressionTableSpecies2) %in% DEgenesSpecies2,] | |
#Step4: Replace Gene names with Eggnog name | |
Sp1[,ncol(Sp1)+1] = rownames(Sp1) | |
colnames(Sp1)[ncol(Sp1)] = species1 | |
Sp1 = merge(eggnog1, Sp1, by =species1, all = F) | |
rownames(Sp1) = Sp1$eggnog | |
Sp1 = Sp1[,3:ncol(Sp1)] | |
Sp2[,ncol(Sp2)+1] = rownames(Sp2) | |
colnames(Sp2)[ncol(Sp2)] = species2 | |
Sp2 = merge(eggnog2, Sp2, by =species2, all = F) | |
rownames(Sp2) = Sp2$eggnog | |
Sp2 = Sp2[,3:ncol(Sp2)] | |
#Step5: Merge Expression Tables | |
geTable = merge(Sp1,Sp2, by='row.names', all=F) | |
rownames(geTable) = geTable$Row.names | |
geTable = geTable[,2:ncol(geTable)] | |
#Step6: Correlation | |
rho.table = diag(x = 1, ncol(geTable), ncol(geTable)) | |
rownames(rho.table) = colnames(geTable) | |
colnames(rho.table) = colnames(geTable) | |
p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable)) | |
rownames(p.value.table) = colnames(geTable) | |
colnames(p.value.table) = colnames(geTable) | |
a = combn(1:ncol(geTable),2) | |
n4BHcorrect = ncol(geTable)*ncol(geTable) | |
for (i in 1:ncol(a)){ | |
cell_type1 = colnames(geTable)[a[1,i]] | |
cell_type2 = colnames(geTable)[a[2,i]] | |
b = suppressWarnings(cor.test(geTable[,cell_type1], geTable[,cell_type2], adjust='none',alternative = "two.sided", method='spearman')) | |
rho.table[a[1,i],a[2,i]] = b$estimate | |
rho.table[a[2,i],a[1,i]] = b$estimate | |
BHcorrected.p.value = p.adjust(b$p.value, method='BH',n=n4BHcorrect) | |
p.value.table[a[1,i],a[2,i]] = BHcorrected.p.value | |
p.value.table[a[2,i],a[1,i]] = BHcorrected.p.value | |
rm(list =c('cell_type1', 'cell_type2','b')) | |
} | |
neg.log10.p = -log10(p.value.table) | |
list.to.return = list(rho.table,p.value.table,neg.log10.p) | |
names(list.to.return) = c('rho','p.value','neg.log10.p.value') | |
return(list.to.return) | |
} | |
####################Rank Correlation All Genes###### | |
RankCorrelationAll = function(ExpressionTableSpecies1, species1 = c('turtle','lizard'), ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken')){ | |
#Step1: Load eggnog tables | |
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="mouse") { | |
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1]) | |
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2)) | |
} | |
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
colnames(eggnog1) = eggnog1[1,] | |
eggnog1 = eggnog1[2:nrow(eggnog1),] | |
colnames(eggnog2) = eggnog2[1,] | |
eggnog2 = eggnog2[2:nrow(eggnog2),] | |
#Step2: Replace Gene names with Eggnog name | |
Sp1 = ExpressionTableSpecies1 | |
Sp1[,ncol(Sp1)+1] = rownames(Sp1) | |
colnames(Sp1)[ncol(Sp1)] = species1 | |
Sp1 = merge(eggnog1, Sp1, by =species1, all = F) | |
rownames(Sp1) = Sp1$eggnog | |
Sp1 = Sp1[,3:ncol(Sp1)] | |
Sp2 = ExpressionTableSpecies2 | |
Sp2[,ncol(Sp2)+1] = rownames(Sp2) | |
colnames(Sp2)[ncol(Sp2)] = species2 | |
Sp2 = merge(eggnog2, Sp2, by =species2, all = F) | |
rownames(Sp2) = Sp2$eggnog | |
Sp2 = Sp2[,3:ncol(Sp2)] | |
#Step3: Merge Expression Tables | |
geTable = merge(Sp1,Sp2, by='row.names', all=F) | |
rownames(geTable) = geTable$Row.names | |
geTable = geTable[,2:ncol(geTable)] | |
#Step4: Correlation | |
rho.table = diag(x = 1, ncol(geTable), ncol(geTable)) | |
rownames(rho.table) = colnames(geTable) | |
colnames(rho.table) = colnames(geTable) | |
p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable)) | |
rownames(p.value.table) = colnames(geTable) | |
colnames(p.value.table) = colnames(geTable) | |
a = combn(1:ncol(geTable),2) | |
n4BHcorrect = ncol(geTable)*ncol(geTable) | |
for (i in 1:ncol(a)){ | |
cell_type1 = colnames(geTable)[a[1,i]] | |
cell_type2 = colnames(geTable)[a[2,i]] | |
b = suppressWarnings(cor.test(geTable[,cell_type1], geTable[,cell_type2], adjust='none',alternative = "two.sided", method='spearman')) | |
rho.table[a[1,i],a[2,i]] = b$estimate | |
rho.table[a[2,i],a[1,i]] = b$estimate | |
BHcorrected.p.value = p.adjust(b$p.value, method='BH',n=n4BHcorrect) | |
p.value.table[a[1,i],a[2,i]] = BHcorrected.p.value | |
p.value.table[a[2,i],a[1,i]] = BHcorrected.p.value | |
rm(list =c('cell_type1', 'cell_type2','b')) | |
} | |
neg.log10.p = -log10(p.value.table) | |
list.to.return = list(rho.table,p.value.table,neg.log10.p) | |
names(list.to.return) = c('rho','p.value','neg.log10.p.value') | |
return(list.to.return) | |
} | |
########################METHOD 3####### | |
########################hypergeometric distribution##### | |
#Step 1: generate list of markers for cell types | |
#requires FindMarkers.MAST function | |
Marker_List = function(SeuratObject,clusters,min.pct=0.4,thresh.use=log(1.5),fdr=0.05){ | |
#step1a generate pairwise DEgenes by MAST | |
DEgenes = list() | |
combinations = t(combn(as.character(clusters),2)) | |
colnames(combinations) <- c("ident.1","ident.2") | |
pb <- txtProgressBar(1, 100, style=3) | |
for (i in 1:nrow(combinations)){ | |
a = combinations[i,1] | |
b = combinations[i,2] | |
suppressMessages(diff.genes <- FindMarkers.MAST(SeuratObject,a,b,min.pct,thresh.use)) | |
#matrix from FindMarkers.MAST only contains genes that have a fdr<0.05 | |
DEgenes[[paste("id1",a,"id2",b, sep="_")]] = rownames(diff.genes[diff.genes$avg_diff>0 & diff.genes$fdr<fdr,]) | |
DEgenes[[paste("id1",b,"id2",a, sep="_")]] = rownames(diff.genes[diff.genes$avg_diff<0 & diff.genes$fdr<fdr,]) | |
rm(list = c('diff.genes','a','b')) | |
setTxtProgressBar(pb, (i*100)/nrow(combinations)) | |
} | |
#step1b generate markers based on union of markers for a celltype | |
markers = list() | |
for (i in 1:length(clusters)) { | |
sub.list = DEgenes[grep(paste('id1_',clusters[i], "_",sep=""),names(DEgenes))] | |
gene.union = vector() | |
for (j in 1:length(sub.list)){ | |
gene.union = union(gene.union,sub.list[[j]]) | |
} | |
markers[[as.character(clusters)[i]]] = gene.union | |
rm(list=c("sub.list","gene.union")) | |
} | |
return(list(DEgenes,markers)) | |
} | |
#Step 2: replace markers with eggnog names | |
# and generate table with number of markers and number of eggnog markers per cell type | |
Eggnog_list_replace = function(genelist_list, species = c('turtle','lizard','mouse','human','chicken')){ | |
#Step1: Load eggnog tables | |
if (species=="turtle") eggnog = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species=="lizard") eggnog = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species=="mouse") { | |
eggnog = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog[2:nrow(eggnog),1] = toupper(eggnog[2:nrow(eggnog),1]) | |
for (i in 1:length(genelist_list)){ | |
genelist_list[[i]] = toupper(genelist_list[[i]]) | |
} | |
} | |
if (species=="human") eggnog = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species=="chicken") eggnog = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
colnames(eggnog) = eggnog[1,] | |
eggnog = eggnog[2:nrow(eggnog),] | |
#Step2 replace names | |
eggnog_list = list() | |
for (i in 1:length(genelist_list)) { | |
eggnog_list[[i]] = eggnog[eggnog[,1] %in% genelist_list[[i]],2] | |
names(eggnog_list)[i] = names(genelist_list)[i] | |
} | |
#Step3 generate summary table to append to eggnog_list | |
summary.matrix = cbind(sapply(genelist_list,length),sapply(eggnog_list,length)) | |
colnames(summary.matrix) = c('nMarkers','nMarkers_eggnog') | |
rownames(summary.matrix) = names(genelist_list) | |
eggnog_list[['summary']] = summary.matrix | |
return(eggnog_list) | |
} | |
#Step 3: p values for overlap | |
hygecdf_matrix = function(eggnog.list1, sp1='tur', eggnog.list2, sp2='liz'){ | |
names(eggnog.list1) = paste(sp1,names(eggnog.list1), sep="_") | |
names(eggnog.list2) = paste(sp2,names(eggnog.list2), sep="_") | |
summaries = list(eggnog.list1[[paste(sp1,'summary', sep="_")]], eggnog.list2[[paste(sp2,'summary', sep="_")]]) | |
eggnog.list1[paste(sp1,'summary', sep="_")] = NULL | |
eggnog.list2[paste(sp2,'summary', sep="_")] = NULL | |
gene.list = c(eggnog.list1, eggnog.list2) | |
hyg = matrix(nrow=length(gene.list),ncol=length(gene.list)) | |
colnames(hyg) = c(names(gene.list)) | |
rownames(hyg) = c(names(gene.list)) | |
JSC = matrix(nrow=length(gene.list),ncol=length(gene.list)) | |
colnames(JSC) = c(names(gene.list)) | |
rownames(JSC) = c(names(gene.list)) | |
nGenes = vector() | |
for(i in 1:length(gene.list)){ | |
nGenes = union(nGenes,gene.list[[i]]) | |
} | |
nGenes = length(nGenes) | |
#nGenes = 10089 #number of turtle v lizard eggnog genes | |
for (i in 1:length(gene.list)){ | |
m = length(gene.list[[i]]) | |
n = nGenes - m | |
n4BHcorrect = nrow(hyg)*ncol(hyg) | |
for (j in 1:length(gene.list)){ | |
k = length(gene.list[[i]]) | |
q = length(intersect(gene.list[[i]],gene.list[[j]])) | |
p.value = phyper(q,m,n,k, lower.tail=F) | |
#BH correct p values in hyg | |
p.value = p.adjust(p.value, method='BH',n=n4BHcorrect) | |
hyg[i,j] = p.value | |
u = length(union(gene.list[[i]],gene.list[[j]])) | |
JSC[i,j] = q/u | |
rm(list=c('k','q','u')) | |
} | |
rm(m) | |
} | |
negLog10 = -log10(hyg) | |
return.list = list(hyg,negLog10,JSC,summaries) | |
names(return.list) = c('hypergeometric_distribution','negative_log10','JSC','summaries') | |
return(return.list) | |
} | |
#Just Calculate Overlap | |
overlap = function(eggnog.list1, sp1='sp1', eggnog.list2, sp2='sp2'){ | |
names(eggnog.list1) = paste(sp1,names(eggnog.list1), sep="_") | |
names(eggnog.list2) = paste(sp2,names(eggnog.list2), sep="_") | |
eggnog.list1[paste(sp1,'summary', sep="_")] = NULL | |
eggnog.list2[paste(sp2,'summary', sep="_")] = NULL | |
gene.list = c(eggnog.list1, eggnog.list2) | |
overlap.matrix = matrix(nrow=length(gene.list),ncol=length(gene.list)) | |
colnames(overlap.matrix) = c(names(gene.list)) | |
rownames(overlap.matrix) = c(names(gene.list)) | |
a = combn(1:ncol(overlap.matrix),2) | |
for (i in 1:ncol(a)){ | |
cell1 = a[1,i] | |
cell2 = a[2,i] | |
nGenes = length(intersect(gene.list[[cell1]],gene.list[[cell2]])) | |
overlap.matrix[cell1,cell2] = nGenes | |
overlap.matrix[cell2,cell1] = nGenes | |
} | |
for(i in 1:nrow(overlap.matrix)){ | |
overlap.matrix[i,i] = length(gene.list[[i]]) | |
} | |
return(overlap.matrix) | |
} | |
#Fake Binarization | |
fake.binary = function(eggnog.list1, sp1='sp1', eggnog.list2, sp2='sp2'){ | |
names(eggnog.list1) = paste(sp1,names(eggnog.list1), sep="_") | |
names(eggnog.list2) = paste(sp2,names(eggnog.list2), sep="_") | |
eggnog.list1[paste(sp1,'summary', sep="_")] = NULL | |
eggnog.list2[paste(sp2,'summary', sep="_")] = NULL | |
gene.list = c(eggnog.list1, eggnog.list2) | |
#step1: create list of markers for analysis | |
markers.sp1 = vector() | |
for(i in 1:length(eggnog.list1)){ | |
markers.sp1 = union(markers.sp1,eggnog.list1[[i]]) | |
} | |
markers.sp2 = vector() | |
for(i in 1:length(eggnog.list2)){ | |
markers.sp2 = union(markers.sp2,eggnog.list2[[i]]) | |
} | |
#markers = union(markers.sp1,markers.sp2) | |
markers = intersect(markers.sp1,markers.sp2) | |
#step2: create matrix of presence/absence of markers | |
expression.matrix = data.frame(row.names=markers) | |
for (i in 1:length(gene.list)){ | |
expression.matrix[rownames(expression.matrix) %in% gene.list[[i]],names(gene.list)[i]] = 1 | |
expression.matrix[!(rownames(expression.matrix) %in% gene.list[[i]]),names(gene.list)[i]] = 0 | |
} | |
#step3: correlation (pearson) | |
Corr.Coeff.Table = diag(x = 1, length(gene.list), length(gene.list)) | |
rownames(Corr.Coeff.Table) = names(gene.list) | |
colnames(Corr.Coeff.Table) = names(gene.list) | |
p.value.table = matrix(ncol=length(gene.list), nrow = length(gene.list)) | |
rownames(p.value.table) = names(gene.list) | |
colnames(p.value.table) = names(gene.list) | |
a = combn(1:length(gene.list),2) | |
n4BHcorrect = length(gene.list)*length(gene.list) | |
for (i in 1:ncol(a)){ | |
cell_type1 = names(gene.list)[a[1,i]] | |
cell_type2 = names(gene.list)[a[2,i]] | |
b = suppressWarnings(cor.test(expression.matrix[,cell_type1], expression.matrix[,cell_type2], adjust='none',alternative = "two.sided", method='pearson')) | |
Corr.Coeff.Table[a[1,i],a[2,i]] = b$estimate | |
Corr.Coeff.Table[a[2,i],a[1,i]] = b$estimate | |
BHcorrected.p.value = p.adjust(b$p.value, method='BH',n=n4BHcorrect) | |
p.value.table[a[1,i],a[2,i]] = BHcorrected.p.value | |
p.value.table[a[2,i],a[1,i]] = BHcorrected.p.value | |
rm(list =c('cell_type1', 'cell_type2','b')) | |
} | |
neg.log10.p = -log10(p.value.table) | |
#return | |
list.to.return = list(Corr.Coeff.Table,p.value.table,neg.log10.p,expression.matrix) | |
names(list.to.return) = c('correlation_coeff','p_values','neg_log10_p','expression_table') | |
return(list.to.return) | |
} | |
##########################Old Scaling Methods########### | |
#scaling.method options | |
#'minmax' sets min at 0 and max at 1 | |
#'max' expression as fraction of max | |
#'yao' expression according to Yao et al Cell Stem Cell 2017 | |
ScaledCorrelation = function(scaling.method = c('minmax','max','yao'),ExpressionTableSpecies1, species1 = c('turtle','lizard','mouse'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2){ | |
#Step1: Load eggnog tables | |
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species1=="mouse") { | |
eggnog1 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog1[2:nrow(eggnog1),1] = toupper(eggnog1[2:nrow(eggnog1),1]) | |
DEgenesSpecies1 = toupper(DEgenesSpecies1) | |
rownames(ExpressionTableSpecies1) = toupper(rownames(ExpressionTableSpecies1)) | |
} | |
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="mouse") { | |
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1]) | |
DEgenesSpecies2 = toupper(DEgenesSpecies2) | |
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2)) | |
} | |
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F) | |
colnames(eggnog1) = eggnog1[1,] | |
eggnog1 = eggnog1[2:nrow(eggnog1),] | |
colnames(eggnog2) = eggnog2[1,] | |
eggnog2 = eggnog2[2:nrow(eggnog2),] | |
#Step2: Take union of DEgenes for analysis | |
DEgenesSpecies1 = eggnog1[eggnog1[,1] %in% DEgenesSpecies1,2] | |
DEgenesSpecies2 = eggnog2[eggnog2[,1] %in% DEgenesSpecies2,2] | |
DEgenes = union(DEgenesSpecies1,DEgenesSpecies2) | |
DEgenesSpecies1 = eggnog1[eggnog1[,2] %in% DEgenes,1] | |
DEgenesSpecies2 = eggnog2[eggnog2[,2] %in% DEgenes,1] | |
#Step3: Prune Expression Tables & Remove rows with no expression | |
ExpressionTableSpecies1 = ExpressionTableSpecies1[rowSums(ExpressionTableSpecies1)!=0,] | |
Sp1 = ExpressionTableSpecies1[rownames(ExpressionTableSpecies1) %in% DEgenesSpecies1,] | |
ExpressionTableSpecies2 = ExpressionTableSpecies2[rowSums(ExpressionTableSpecies2)!=0,] | |
Sp2 = ExpressionTableSpecies2[rownames(ExpressionTableSpecies2) %in% DEgenesSpecies2,] | |
#Step4: Replace Gene names with Eggnog name | |
Sp1[,ncol(Sp1)+1] = rownames(Sp1) | |
colnames(Sp1)[ncol(Sp1)] = species1 | |
Sp1 = merge(eggnog1, Sp1, by =species1, all = F) | |
rownames(Sp1) = Sp1$eggnog | |
Sp1 = Sp1[,3:ncol(Sp1)] | |
Sp2[,ncol(Sp2)+1] = rownames(Sp2) | |
colnames(Sp2)[ncol(Sp2)] = species2 | |
Sp2 = merge(eggnog2, Sp2, by =species2, all = F) | |
rownames(Sp2) = Sp2$eggnog | |
Sp2 = Sp2[,3:ncol(Sp2)] | |
#Step5: Scale Expression Tables by scaling.method | |
if(scaling.method=='minmax'){ | |
gene_min = apply(Sp1, 1, min) | |
Sp1 = sweep(Sp1,1,gene_min,"-") | |
gene_max = apply(Sp1, 1, max) | |
Sp1 = sweep(Sp1,1,gene_max,"/") | |
rm(list=c('gene_min','gene_max')) | |
gene_min = apply(Sp2, 1, min) | |
Sp2 = sweep(Sp2,1,gene_min,"-") | |
gene_max = apply(Sp2, 1, max) | |
Sp2 = sweep(Sp2,1,gene_max,"/") | |
rm(list=c('gene_min','gene_max')) | |
} | |
if(scaling.method=='max'){ | |
gene_max = apply(Sp1, 1, max) | |
Sp1 = sweep(Sp1,1,gene_max,"/") | |
rm(gene_max) | |
gene_max = apply(Sp2, 1, max) | |
Sp2 = sweep(Sp2,1,gene_max,"/") | |
rm(gene_max) | |
} | |
if(scaling.method=='yao'){ | |
gene_max = apply(Sp1, 1, max) | |
gene_min = apply(Sp1, 1, min) | |
#Determine median for entire dataset, not just pruned data!!! | |
med = apply(ExpressionTableSpecies1,1,max) | |
med = median(med) | |
for (i in 1:nrow(Sp1)) { | |
if (gene_max[i]>med){ | |
Sp1[i,] = sapply(Sp1[i,],function(x)(x-gene_min[i])/gene_max[i]) | |
} | |
else { | |
Sp1[i,] = sapply(Sp1[i,],function(x)(x-gene_min[i])/med) | |
} | |
} | |
rm(list=c('gene_max','gene_min','med')) | |
gene_max = apply(Sp2, 1, max) | |
gene_min = apply(Sp2, 1, min) | |
med = apply(ExpressionTableSpecies2,1,max) | |
med = median(med) | |
for (i in 1:nrow(Sp2)) { | |
if (gene_max[i]>med){ | |
Sp2[i,] = sapply(Sp2[i,],function(x)(x-gene_min[i])/gene_max[i]) | |
} | |
else { | |
Sp2[i,] = sapply(Sp2[i,],function(x)(x-gene_min[i])/med) | |
} | |
} | |
rm(list=c('gene_max','gene_min','med')) | |
} | |
#Step6: Merge Expression Tables | |
geTable = merge(Sp1,Sp2, by='row.names', all=F) | |
rownames(geTable) = geTable$Row.names | |
geTable = geTable[,2:ncol(geTable)] | |
#Step7: Correlation | |
Corr.Coeff.Table = diag(x = 1, ncol(geTable), ncol(geTable)) | |
rownames(Corr.Coeff.Table) = colnames(geTable) | |
colnames(Corr.Coeff.Table) = colnames(geTable) | |
p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable)) | |
rownames(p.value.table) = colnames(geTable) | |
colnames(p.value.table) = colnames(geTable) | |
a = combn(1:ncol(geTable),2) | |
n4BHcorrect = ncol(geTable)*ncol(geTable) | |
for (i in 1:ncol(a)){ | |
cell_type1 = colnames(geTable)[a[1,i]] | |
cell_type2 = colnames(geTable)[a[2,i]] | |
b = suppressWarnings(cor.test(geTable[,cell_type1], geTable[,cell_type2], adjust='none',alternative = "two.sided", method='pearson')) | |
Corr.Coeff.Table[a[1,i],a[2,i]] = b$estimate | |
Corr.Coeff.Table[a[2,i],a[1,i]] = b$estimate | |
BHcorrected.p.value = p.adjust(b$p.value, method='BH',n=n4BHcorrect) | |
p.value.table[a[1,i],a[2,i]] = BHcorrected.p.value | |
p.value.table[a[2,i],a[1,i]] = BHcorrected.p.value | |
rm(list =c('cell_type1', 'cell_type2','b')) | |
} | |
neg.log10.p = -log10(p.value.table) | |
list.to.return = list(Corr.Coeff.Table,p.value.table,neg.log10.p) | |
names(list.to.return) = c('corr.coeff','p.value','neg.log10.p.value') | |
return(list.to.return) | |
} |