Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
final updates
  • Loading branch information
MPIBR-toschesm committed May 3, 2018
1 parent fe016fb commit f03cddf
Show file tree
Hide file tree
Showing 13 changed files with 965 additions and 2,463 deletions.
729 changes: 0 additions & 729 deletions Comparative_analysis/ComparativeAnalysis_CleanFunctions.R

This file was deleted.

@@ -1,311 +1,32 @@
#######################Specificity Correlation, Permutation Test################
##########################Intersect of DEgene lists#############################

#input:
#ExpressionTableSpecies1 - must be a data.frame
#species1 = c('turtle','lizard','mouse')
#DEgenesSpecies1
#ExpressionTableSpecies2 - must be a data.frame
#species1 = c('turtle','lizard','mouse','human','chicken')
#DEgenesSpecies2
#nPermutations = 1000

#returns:
#list with
#corr.coeff
#shuffled_correlation_score_means
#p.value
#negative_log10_p.value
#DEgenes_intersect
#DEgenes_in_analysis
#nDEgenes_intersect
#nDEgenes_in_analysis
#nDEgenes_Sp1
#nDEgenes_Sp2

SpIntPermute = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','mouse'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2, nPermutations = 1000){

#Step1: Load eggnog tables
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species1=="mouse") {
eggnog1 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F)
eggnog1[2:nrow(eggnog1),1] = toupper(eggnog1[2:nrow(eggnog1),1])
DEgenesSpecies1 = toupper(DEgenesSpecies1)
rownames(ExpressionTableSpecies1) = toupper(rownames(ExpressionTableSpecies1))
}
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species2=="mouse") {
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F)
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1])
DEgenesSpecies2 = toupper(DEgenesSpecies2)
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2))
}
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F)

colnames(eggnog1) = eggnog1[1,]
eggnog1 = eggnog1[2:nrow(eggnog1),]
colnames(eggnog2) = eggnog2[1,]
eggnog2 = eggnog2[2:nrow(eggnog2),]

#Step2: Take intersect of DEgenes for analysis
DEgenesSpecies1 = eggnog1[eggnog1[,1] %in% DEgenesSpecies1,2]
nDESp1 = length(DEgenesSpecies1)
DEgenesSpecies2 = eggnog2[eggnog2[,1] %in% DEgenesSpecies2,2]
nDESp2 = length(DEgenesSpecies2)
DEgenes = intersect(DEgenesSpecies1,DEgenesSpecies2)
DEgenesSpecies1 = eggnog1[eggnog1[,2] %in% DEgenes,1]
DEgenesSpecies2 = eggnog2[eggnog2[,2] %in% DEgenes,1]

#Step3: Prune Expression Tables & Remove rows with no expression
Sp1 = ExpressionTableSpecies1[rownames(ExpressionTableSpecies1) %in% DEgenesSpecies1,]
Sp1 = Sp1[rowSums (Sp1)!=0,]
Sp2 = ExpressionTableSpecies2[rownames(ExpressionTableSpecies2) %in% DEgenesSpecies2,]
Sp2 = Sp2[rowSums (Sp2)!=0,]

#Step4: Replace Gene names with Eggnog name
Sp1[,ncol(Sp1)+1] = rownames(Sp1)
colnames(Sp1)[ncol(Sp1)] = species1
Sp1 = merge(eggnog1, Sp1, by =species1, all = F)
rownames(Sp1) = Sp1$eggnog
Sp1 = Sp1[,3:ncol(Sp1)]

Sp2[,ncol(Sp2)+1] = rownames(Sp2)
colnames(Sp2)[ncol(Sp2)] = species2
Sp2 = merge(eggnog2, Sp2, by =species2, all = F)
rownames(Sp2) = Sp2$eggnog
Sp2 = Sp2[,3:ncol(Sp2)]

#Step5: Scale Expression Tables by gene average
avg = rowMeans(Sp1)
Sp1 = sweep(Sp1,1,avg,"/")
rm(avg)
avg = rowMeans(Sp2)
Sp2 = sweep(Sp2,1,avg,"/")
rm(avg)

#Step6: Merge Expression Tables
geTable = merge(Sp1,Sp2, by='row.names', all=F)
rownames(geTable) = geTable$Row.names
geTable = geTable[,2:ncol(geTable)]

#Step7: Correlation
#7a: Pearson correlation
Corr.Coeff.Table = cor(geTable,method='pearson')

#7b: Shuffle data
shuffled.cor.list = list()
pb <- txtProgressBar(1, 100, style=3)

for (i in 1:nPermutations){
shuffled = apply(geTable[,1:ncol(Sp1)],1,sample)
shuffled2 = apply(geTable[,(ncol(Sp1)+1):ncol(geTable)],1,sample)
shuffled = cbind(t(shuffled),t(shuffled2))
shuffled.cor = cor(shuffled,method='pearson')
shuffled.cor.list[[i]] = shuffled.cor
rm(list=c('shuffled','shuffled2','shuffled.cor'))
if ((i %% 100) ==0){
setTxtProgressBar(pb, (i*100)/nPermutations)
}
}

p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable))
rownames(p.value.table) = colnames(geTable)
colnames(p.value.table) = colnames(geTable)

shuffled.mean.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable))
rownames(shuffled.mean.table) = colnames(geTable)
colnames(shuffled.mean.table) = colnames(geTable)

a = combn(1:ncol(geTable),2)
pb2 <- txtProgressBar(1, 100, style=3)
for (i in 1:ncol(a)){
cor.scores = sapply(shuffled.cor.list,"[",a[1,i],a[2,i])
shuffled.mean.table[a[1,i],a[2,i]] = mean(cor.scores)
shuffled.mean.table[a[2,i],a[1,i]] = mean(cor.scores)
p.value = mean(abs(cor.scores)>=abs(Corr.Coeff.Table[a[1,i],a[2,i]]))
p.value.table[a[1,i],a[2,i]] = p.value
p.value.table[a[2,i],a[1,i]] = p.value
rm(list=c('cor.scores','p.value'))
setTxtProgressBar(pb2, (i*100)/ncol(a))
}

neg.log10.p = -log10(p.value.table)

list.to.return = list(Corr.Coeff.Table,shuffled.mean.table,p.value.table,neg.log10.p,DEgenes,rownames(geTable),length(DEgenes),length(rownames(geTable)),nDESp1,nDESp2)
names(list.to.return) = c('corr.coeff','shuffled_correlation_score_means','p.value','negative_log10_p.value','DEgenes_intersect','DEgenes_in_analysis','nDEgenes_intersect','nDEgenes_in_analysis','nDEgenes_Sp1','nDEgenes_Sp2')
return(list.to.return)

}

##########################Union of DEgene lists#############################

#input:
#ExpressionTableSpecies1 - must be a data.frame
#species1 = c('turtle','lizard','mouse')
#DEgenesSpecies1
#ExpressionTableSpecies2 - must be a data.frame
#species1 = c('turtle','lizard','mouse','human','chicken')
#DEgenesSpecies2
#nPermutations = 1000

#returns:
#list with
#corr.coeff
#shuffled_correlation_score_means
#p.value
#negative_log10_p.value
#DEgenes_intersect
#DEgenes_in_analysis
#nDEgenes_intersect
#nDEgenes_in_analysis
#nDEgenes_Sp1
#nDEgenes_Sp2

SpUniPermute = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','mouse'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2, nPermutations = 1000){

#Step1: Load eggnog tables
if (species1=="turtle") eggnog1 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species1=="lizard") eggnog1 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species1=="mouse") {
eggnog1 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F)
eggnog1[2:nrow(eggnog1),1] = toupper(eggnog1[2:nrow(eggnog1),1])
DEgenesSpecies1 = toupper(DEgenesSpecies1)
rownames(ExpressionTableSpecies1) = toupper(rownames(ExpressionTableSpecies1))
}
if (species2=="turtle") eggnog2 = read.table('chrysemys_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species2=="lizard") eggnog2 = read.table('pogona_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species2=="mouse") {
eggnog2 = read.table('mus_eggnog_pruned.txt',header=F,stringsAsFactors = F)
eggnog2[2:nrow(eggnog2),1] = toupper(eggnog2[2:nrow(eggnog2),1])
DEgenesSpecies2 = toupper(DEgenesSpecies2)
rownames(ExpressionTableSpecies2) = toupper(rownames(ExpressionTableSpecies2))
}
if (species2=="human") eggnog2 = read.table('homo_eggnog_pruned.txt',header=F,stringsAsFactors = F)
if (species2=="chicken") eggnog2 = read.table('gallus_eggnog_pruned.txt',header=F,stringsAsFactors = F)

colnames(eggnog1) = eggnog1[1,]
eggnog1 = eggnog1[2:nrow(eggnog1),]
colnames(eggnog2) = eggnog2[1,]
eggnog2 = eggnog2[2:nrow(eggnog2),]

#Step2: Take union of DEgenes for analysis
DEgenesSpecies1 = eggnog1[eggnog1[,1] %in% DEgenesSpecies1,2]
nDESp1 = length(DEgenesSpecies1)
DEgenesSpecies2 = eggnog2[eggnog2[,1] %in% DEgenesSpecies2,2]
nDESp2 = length(DEgenesSpecies2)
DEgenes = union(DEgenesSpecies1,DEgenesSpecies2)
DEgenesSpecies1 = eggnog1[eggnog1[,2] %in% DEgenes,1]
DEgenesSpecies2 = eggnog2[eggnog2[,2] %in% DEgenes,1]

#Step3: Prune Expression Tables & Remove rows with no expression
Sp1 = ExpressionTableSpecies1[rownames(ExpressionTableSpecies1) %in% DEgenesSpecies1,]
Sp1 = Sp1[rowSums (Sp1)!=0,]
Sp2 = ExpressionTableSpecies2[rownames(ExpressionTableSpecies2) %in% DEgenesSpecies2,]
Sp2 = Sp2[rowSums (Sp2)!=0,]

#Step4: Replace Gene names with Eggnog name
Sp1[,ncol(Sp1)+1] = rownames(Sp1)
colnames(Sp1)[ncol(Sp1)] = species1
Sp1 = merge(eggnog1, Sp1, by =species1, all = F)
rownames(Sp1) = Sp1$eggnog
Sp1 = Sp1[,3:ncol(Sp1)]

Sp2[,ncol(Sp2)+1] = rownames(Sp2)
colnames(Sp2)[ncol(Sp2)] = species2
Sp2 = merge(eggnog2, Sp2, by =species2, all = F)
rownames(Sp2) = Sp2$eggnog
Sp2 = Sp2[,3:ncol(Sp2)]

#Step5: Scale Expression Tables by gene average
avg = rowMeans(Sp1)
Sp1 = sweep(Sp1,1,avg,"/")
rm(avg)
avg = rowMeans(Sp2)
Sp2 = sweep(Sp2,1,avg,"/")
rm(avg)

#Step6: Merge Expression Tables
geTable = merge(Sp1,Sp2, by='row.names', all=F)
rownames(geTable) = geTable$Row.names
geTable = geTable[,2:ncol(geTable)]

#Step7: Correlation
#7a: Pearson correlation
Corr.Coeff.Table = cor(geTable,method='pearson')

#7b: Shuffle data
shuffled.cor.list = list()
pb <- txtProgressBar(1, 100, style=3)

for (i in 1:nPermutations){
shuffled = apply(geTable[,1:ncol(Sp1)],1,sample)
shuffled2 = apply(geTable[,(ncol(Sp1)+1):ncol(geTable)],1,sample)
shuffled = cbind(t(shuffled),t(shuffled2))
shuffled.cor = cor(shuffled,method='pearson')
shuffled.cor.list[[i]] = shuffled.cor
rm(list=c('shuffled','shuffled2','shuffled.cor'))
if ((i %% 100) ==0){
setTxtProgressBar(pb, (i*100)/nPermutations)
}
}

p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable))
rownames(p.value.table) = colnames(geTable)
colnames(p.value.table) = colnames(geTable)

shuffled.mean.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable))
rownames(shuffled.mean.table) = colnames(geTable)
colnames(shuffled.mean.table) = colnames(geTable)

a = combn(1:ncol(geTable),2)
pb2 <- txtProgressBar(1, 100, style=3)
for (i in 1:ncol(a)){
cor.scores = sapply(shuffled.cor.list,"[",a[1,i],a[2,i])
shuffled.mean.table[a[1,i],a[2,i]] = mean(cor.scores)
shuffled.mean.table[a[2,i],a[1,i]] = mean(cor.scores)
p.value = mean(abs(cor.scores)>=abs(Corr.Coeff.Table[a[1,i],a[2,i]]))
p.value.table[a[1,i],a[2,i]] = p.value
p.value.table[a[2,i],a[1,i]] = p.value
rm(list=c('cor.scores','p.value'))
setTxtProgressBar(pb2, (i*100)/ncol(a))
}

neg.log10.p = -log10(p.value.table)

list.to.return = list(Corr.Coeff.Table,shuffled.mean.table,p.value.table,neg.log10.p,DEgenes,rownames(geTable),length(DEgenes),length(rownames(geTable)),nDESp1,nDESp2)
names(list.to.return) = c('corr.coeff','shuffled_correlation_score_means','p.value','negative_log10_p.value','DEgenes_intersect','DEgenes_in_analysis','nDEgenes_intersect','nDEgenes_in_analysis','nDEgenes_Sp1','nDEgenes_Sp2')
return(list.to.return)

}

#######################General Correlation (choose method), Permutation Test################

#input:
#ExpressionTableSpecies1 - must be a data.frame
#species1 = c('turtle','lizard','mouse')
#DEgenesSpecies1
#ExpressionTableSpecies2 - must be a data.frame
#species1 = c('turtle','lizard','mouse','human','chicken')
#DEgenesSpecies2
#nPermutations = 1000
#genes.use = c('intersect','union','species1','species2')
#corr.method = c('spearman', 'pearson') etc.
################# General Correlation (choose method), Permutation Test, all genes ################

# requires:
# tables with the gene functional annotations generated by eggnog

# input:
# ExpressionTableSpecies1 - must be a data.frame
# species1 = c('turtle','lizard','mouse')
# DEgenesSpecies1
# ExpressionTableSpecies2 - must be a data.frame
# species1 = c('turtle','lizard','mouse','human','chicken')
# DEgenesSpecies2
# nPermutations = 1000
# genes.use = c('intersect','union','species1','species2')
# corr.method = c('spearman', 'pearson') etc.

#returns:
#list with
#corr.coeff
#shuffled_correlation_score_means
#p.value
#negative_log10_p.value
#DEgenes_intersect
#DEgenes_in_analysis
#nDEgenes_intersect
#nDEgenes_in_analysis
#nDEgenes_Sp1
#nDEgenes_Sp2
# returns:
# list with
# corr.coeff
# shuffled_correlation_score_means
# p.value
# negative_log10_p.value
# DEgenes_intersect
# DEgenes_in_analysis
# nDEgenes_intersect
# nDEgenes_in_analysis
# nDEgenes_Sp1
# nDEgenes_Sp2

SpPermute = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','mouse'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2, nPermutations = 1000, genes.use='intersect', corr.method = 'spearman'){

Expand Down Expand Up @@ -442,7 +163,15 @@ SpPermute = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','mo
}


# includes only transcription factors in the comparison. TF list from ensembl, file 170712_TFs_list_ensembl.txt, using GO terms GO:0003700 (transcription factor activity), GO:0003702 (RNA polymerase II transcription factor activity), GO:0003709 (RNA polymerase III transcription factor activity), GO:0016563 (transcriptional activator activity) and GO:0016564 (transcriptional repressor activity). See http://www.transcriptionfactor.org/index.cgi?About
############ General Correlation (choose method), Permutation Test, transcription factors #############

# includes only transcription factors in the comparison. TF list from ensembl, file 170712_TFs_list_ensembl.txt, using GO terms GO:0003700 (transcription factor activity), GO:0003702 (RNA polymerase II transcription factor activity), GO:0003709 (RNA polymerase III transcription factor activity), GO:0016563 (transcriptional activator activity) and GO:0016564 (transcriptional repressor activity).

# requires:
# tables with the gene functional annotations generated by eggnog
# list of transcription factors

# input and output as above

SpPermuteTF = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','mouse'), DEgenesSpecies1, ExpressionTableSpecies2, species2 = c('turtle','lizard','mouse','human','chicken'), DEgenesSpecies2, nPermutations = 1000, genes.use='intersect', corr.method = 'spearman'){

Expand Down Expand Up @@ -585,8 +314,9 @@ SpPermuteTF = function(ExpressionTableSpecies1, species1 = c('turtle','lizard','
}


# NoisyGenes function
# this function finds those genes that are not expressed above a certain percentage of cells (min.pct) in all of the clusters. Genes identified by this function are too noisy and can be removed before running comparative analysis
################# NoisyGenes function ################

# this function finds those genes that are not expressed above a certain percentage of cells (min.pct) in all of the clusters. Genes identified by this function are too noisy, and can be removed before running comparative analysis
# clusters.use = character vectors of the clusters ids to include in the analysis
# Example for comparative analysis:
# object_noisy_genes <- NoisyGenes(object, min.pct=0.2, clusters.use=as.character(levels(object@ident)))
Expand Down
20 changes: 20 additions & 0 deletions Comparative_analysis/CorrComparePlot.R
@@ -1,5 +1,25 @@
# this function is a wrapper that computes and plots correlation matrices of cell types
# it takes as input average cell type matrices and differentially expressed genes
# it calculates correlation significance with a permutation test (1000 permutations)
# requires the functions in the file Comparative_functions.R:
# requires the package corrplot

# input:
# ExpressionTableSpecies1 = data.frame with the average expression data by cell type in Species 1 (e.g. output of the AverageExpression function in Seurat)
# DEgenesSpecies1 = vector with the names of the genes differentially expressed among the cell types of the Species 1 set
# ExpressionTableSpecies2 - data.frame with the average expression data by cell type in Species 2
# DEgenesSpecies2 = vector with the names of the genes differentially expressed among the cell types of the Species 2 set
# species1 = c('turtle','lizard','mouse')
# species2 = c('turtle','lizard','mouse','human','chicken')
# filename = to save plots (in pdf)
# Height and Width are plot parameters

# returns a plot and a list:
# - plot with correlation matrices calculated by taking (1) the intersection of DE genes in the two species, (2) the union of DE genes, (3) only the DE genes in Species 1, (4) only the DE genes in Species 2.
# - plot with correlation matrices calculated by taking intersect, union, Species1 or Species2 differentially expressed transcription factors
#
# list with all the correlation matrices (intersect, intersect.TF, union, union.TF, species1, species1.TF, species2, species2.TF)


CorrComparePlot <- function(ExpressionTableSpecies1, DEgenesSpecies1, ExpressionTableSpecies2, DEgenesSpecies2, Species1, Species2, filename, Height=12, Width=10) {

Expand Down

0 comments on commit f03cddf

Please sign in to comment.