Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
multimodalR/R/plotting.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
418 lines (404 sloc)
19 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Plot Gene Expression | |
#' | |
#' Plots the (simulated) gene expression data frame, creates a pdf file | |
#' @param expression The gene expression data frame that shall be plotted | |
#' @param plotsPerPage The number of plots per Page (has to be a divisor of | |
#' GenesToPlot) | |
#' @param GenesToPlot The number of genes to be plotted | |
#' @param plotName The name of the plot without .pdf | |
#' @examples | |
#' \dontrun{ | |
#' params <- newParams(nGenes = list("1"=5,"2"=list("gauss"= 5,"gamma"= 5))) | |
#' simulation <- simulateExpression(params= params,verbose=FALSE) | |
#' expression <- simulation$expressionData | |
#' plotExpression(expression,plotsPerPage = 4,GenesToPlot = 12,plotName = "plot12Genes") | |
#' plotExpression(expression,plotsPerPage = 5,GenesToPlot = nrow(expression), | |
#' plotName = "plotAllGenes") | |
#' } | |
#' | |
#' @return The name of the plot in the pdf file | |
#' @importFrom ggplot2 aes ggplot geom_density facet_wrap labs theme element_text | |
#' @importFrom grDevices dev.off pdf | |
#' @importFrom reshape2 melt | |
#' @export | |
plotExpression <- function(expression,plotsPerPage = 1,GenesToPlot = 100, | |
plotName="expression"){ | |
plotName <- sprintf(paste0(plotName, ".pdf")) | |
grDevices::pdf(plotName, 7, 5) | |
for (i in seq(0,((GenesToPlot/plotsPerPage))-1,1)){ | |
section <- (c(((i*plotsPerPage)+1):((i+1)*plotsPerPage))) | |
#models[((i*plotsPerPage)+1):((i+1)*plotsPerPage)] | |
expressionVector<-c() | |
for(j in section){ | |
expressionVector<-rbind2(expressionVector, expression[j,]) | |
#transformate data for ggplot | |
meltedExpression <- reshape2::melt(data = t(expressionVector)) | |
} | |
#facetted Plot | |
value <- meltedExpression | |
a<- ggplot2::ggplot(meltedExpression, ggplot2::aes(x= value)) + | |
ggplot2::geom_density()+ | |
ggplot2::facet_wrap(~Var2)+ | |
ggplot2::labs(x = "log2 Expression",y = "Density")+ | |
ggplot2::theme( | |
axis.title.x = ggplot2::element_text(size = 12), | |
axis.title.y = ggplot2::element_text(size = 12) | |
) | |
plot(a) | |
} | |
grDevices::dev.off() | |
return(plotName) | |
} | |
#' Source: http://tinyheero.github.io/2015/10/13/mixture-model.html | |
#' Plot a Mixture Component | |
#' | |
#' @param x Input data | |
#' @param mu Mean of component | |
#' @param sigma Standard deviation of component | |
#' @param lam Mixture weight of component | |
plot_mix_comps <- function(x, mu, sigma, lam) { | |
lam * dnorm(x, mu, sigma) | |
} | |
#' Plot Output | |
#' | |
#' Plots the output of algorithms with the right data format | |
#' with the (simulated) gene expression, creates a pdf file | |
#' @param expression The gene expression data frame that shall be plotted | |
#' @param algorithmOutput The output of the algorithm | |
#' @param GenesToPlot The number of genes to be plotted | |
#' @param name The name of the plot without .pdf | |
#' @examples | |
#' \dontrun{ | |
#' params <- newParams(nGenes = list("1"=5,"2"=list("gauss"= 5,"gamma"= 5))) | |
#' simulation <- simulateExpression(params= params,verbose=FALSE) | |
#' expression <- simulation$expressionData | |
#' output <- useMclust(expression = expression, verbose = FALSE) | |
#' plotOutput(expression = expression,algorithmOutput = output, | |
#' GenesToPlot = 10,name = "plot10GenesWExpression") | |
#' plotOutput(expression = expression,algorithmOutput = output, | |
#' GenesToPlot = nrow(expression),name = "plotAllGenesWExpression") | |
#' } | |
#' | |
#' @return The plot name of the plot in the pdf file | |
#' @importFrom ggplot2 ggplot geom_density aes stat_function labs theme element_text | |
#' @importFrom grDevices dev.off pdf | |
#' @export | |
plotOutput <- function(expression,algorithmOutput,GenesToPlot = 100,name="output"){ | |
name <- sprintf(paste0(name, ".pdf")) | |
grDevices::pdf(name, 7, 5) | |
lapply(1:GenesToPlot, function(i){ | |
nPatients<- sum(algorithmOutput$Genes[[i]]$sizes) | |
titles <- rownames(expression) | |
title <- titles[i] | |
outputPlot <- ggplot2::ggplot(data.frame(t(expression[i,]))) + | |
ggplot2::geom_density(size = 1.1,ggplot2::aes(t(expression[i,])), | |
show.legend = TRUE) + | |
lapply(1:length(algorithmOutput$Genes[[i]]$means),function(x){ | |
ggplot2::stat_function(geom = "line", fun = plot_mix_comps, | |
args = list(algorithmOutput$Genes[[i]]$means[x], algorithmOutput$Genes[[i]]$sds[x], | |
algorithmOutput$Genes[[i]]$sizes[x]/nPatients), | |
colour = "red", lwd = 1.1,show.legend = TRUE) | |
})+ggplot2::labs(x = "log2 Expression",y = "Density",title= title)+ | |
ggplot2::theme( | |
plot.title = ggplot2::element_text(face = "bold",size = 16,hjust = 0.5), | |
axis.title.x = ggplot2::element_text(size = 12), | |
axis.title.y = ggplot2::element_text(size = 12) | |
) | |
plot(outputPlot) | |
}) | |
grDevices::dev.off() | |
return(name) | |
} | |
#' Plot Statistics | |
#' | |
#' Plots the output of the statistics as a bar plot, creates a pdf file | |
#' @param statistics The output of the getStatistics function | |
#' @param name The name of the plot without .pdf | |
#' @examples | |
#' \dontrun{ | |
#' params <- newParams(nGenes = list("1"=5,"2"=list("gauss"= 5,"gamma"= 5))) | |
#' simulation <- simulateExpression(params= params,verbose=FALSE) | |
#' expression <- simulation$expressionData | |
#' validationData <- simulation$validationData | |
#' outputMclust <- useMclust(expression = expression, verbose = FALSE) | |
#' outputHdbscan <- useHdbscan(expression = expression, verbose = FALSE) | |
#' evaluationMclust <- evaluateAlgorithmOutput(modality = 2, | |
#' algorithmOutput = outputMclust, | |
#' validationData = validationData, | |
#' maxDifference = 10) | |
#' evaluationHDBSCAN <- evaluateAlgorithmOutput(modality = 2, | |
#' algorithmOutput = outputHdbscan, | |
#' validationData = validationData,maxDifference = 10) | |
#' statistics <- getStatistics(evaluations = list("mclust"=evaluationMclust, | |
#' "HDBSCAN"=evaluationHDBSCAN)) | |
#' plotStatistics(statistics = statistics,name = "mclustHDBSCAN") | |
#' } | |
#' | |
#' @importFrom ggplot2 ggplot aes geom_bar scale_fill_manual theme_bw theme element_text labs | |
#' @importFrom grDevices dev.off grey.colors pdf | |
#' @export | |
plotStatistics <- function(statistics,name="statistics"){ | |
statistics[is.na(statistics)] <- 0 | |
name <- sprintf(paste0(name, ".pdf")) | |
grDevices::pdf(name, 7, 5) | |
lapply(1:length(statistics),function(i){ | |
value<- statistics[,i] | |
name<-c("TRUE","FALSE") | |
data <- data.frame(name,value) | |
statPlot <- ggplot2::ggplot(data = data,ggplot2::aes(x=name,y = value))+ | |
ggplot2::geom_bar(stat = "identity", | |
ggplot2::aes(fill=grey.colors(2,0,1)))+ | |
ggplot2::scale_fill_manual(values = c("grey57","grey30"))+ | |
ggplot2::theme_bw()+ggplot2::theme(legend.position = "none",plot.title = ggplot2::element_text(hjust = 0.5))+ | |
ggplot2::labs(title=names(statistics)[i],x="Statistic",y="Rate in [%]") | |
plot(statPlot) | |
}) | |
grDevices::dev.off() | |
return(name) | |
} | |
#' Plot Classifications | |
#' | |
#' Plots the output of the classifications as a bar plot with percentages, | |
#' creates a pdf file | |
#' @param classifications The output of the getStatistics function | |
#' @param name The name of the plot without .pdf | |
#' @examples | |
#' \dontrun{ | |
#' params <- newParams(nGenes = list("1"=5,"2"=list("gauss"= 5,"gamma"= 5))) | |
#' simulation <- simulateExpression(params= params,verbose=FALSE) | |
#' expression <- simulation$expressionData | |
#' validationData <- simulation$validationData | |
#' outputMclust <- useMclust(expression = expression, verbose = FALSE) | |
#' outputHdbscan <- useHdbscan(expression = expression, verbose = FALSE) | |
#' classifications <- getClassifications(modality = 2, | |
#' validationData = validationData, | |
#' outputs = list("mclust"=outputMclust,"HDBSCAN"=outputHdbscan)) | |
#' plotClassifications(classifications = classifications, | |
#' name = "mclustHDBSCAN_Classification") | |
#' } | |
#' | |
#' @importFrom ggplot2 ggplot aes geom_bar scale_fill_manual theme_bw theme element_text labs | |
#' @importFrom grDevices dev.off pdf | |
#' @export | |
plotClassifications <- function(classifications, name="classifications"){ | |
classifications[is.na(classifications)] <- 0 | |
classificationsPerc <- classifications/sum(classifications[1,])*100 | |
name <- sprintf(paste0(name, ".pdf")) | |
grDevices::pdf(name, 7, 5) | |
lapply(1:nrow(classificationsPerc),function(i){ | |
nameClassifications=c("FN","FP","TN","TP") | |
value <- unname(unlist(classificationsPerc[i,])) | |
data <- data.frame(nameClassifications,value) | |
classPlot <- ggplot2::ggplot(data = data,ggplot2::aes(x=nameClassifications,y=value))+ | |
ggplot2::geom_bar(stat = "identity",fill= c("red4","firebrick2","limegreen","lawngreen"))+ | |
ggplot2::scale_fill_manual(values = c("red4","firebrick2","limegreen","lawngreen"))+ | |
ggplot2::theme_bw()+ggplot2::theme(legend.position = "none",plot.title = ggplot2::element_text(hjust = 0.5))+ | |
ggplot2::labs(title=rownames(classificationsPerc[i,]),x="Classification", y="Rate in [%]") | |
plot(classPlot) | |
}) | |
grDevices::dev.off() | |
return(name) | |
} | |
#' Plot Survival Curves | |
#' | |
#' Plots the survival curves of TCGA data and calculates whether survival curves of patient groups within a gene are significantly different with a log-rank test or Mantel-Haenszel test. p-Values are calculated between all patient groups. | |
#' @param metadata The metadata of the TCGA data | |
#' @param output (Filtered) Algorithm Output in the unified output data format | |
#' @param sampleType Sample Type of the TCGA data that was used | |
#' @param name The name of the plot without .pdf | |
#' | |
#' | |
#' @importFrom ggplot2 ggplot aes ggtitle scale_color_discrete theme element_text coord_fixed xlab ylab | |
#' @importFrom cowplot ggdraw add_sub | |
#' @importFrom grDevices dev.off pdf | |
#' @importFrom magrittr "%>%" | |
#' @importFrom stringr str_replace_all | |
#' @importFrom graphics hist par | |
#' @importFrom stats as.formula pchisq | |
#' @importFrom dplyr mutate select | |
#' @importFrom survival Surv survdiff | |
#' @importFrom ggkm geom_km | |
#' @export | |
plotSurvivalCurves <- function(metadata,output,sampleType="Primary Tumor",name="survivalCurves"){ | |
`%>%` <- magrittr::`%>%` | |
correctSampleType <- which(metadata$sampleType==sampleType) | |
metadataSampleType <- metadata[correctSampleType,] | |
metadataPatientIDs <- unlist(lapply(1:nrow(metadataSampleType),function(i){ | |
id <- stringr::str_replace_all(string = metadataSampleType$caseID[[i]],pattern = "-",replacement = ".") | |
if(any(c(0:9)==substr(x = id,start = 1,stop = 1))){ | |
id <- paste0("X",id) | |
} | |
id | |
})) | |
metadataUniqueIDs <- make.unique(metadataPatientIDs) | |
genes <- names(output$Genes) | |
patients <- unlist(output$Genes[[1]]$groups) | |
patientMetadata <- sort(unlist(lapply(patients,function(i){ | |
patientNumber <- which(make.unique(metadataUniqueIDs)==i) | |
}))) | |
metadataSampleType$caseID <- metadataUniqueIDs | |
smallMetadata <- metadataSampleType[patientMetadata,] | |
phenotypes <- smallMetadata %>% | |
dplyr::mutate(age = trunc(age*(-1)/365), | |
age_group = cut(age, breaks=c(-Inf, 30, 40, 50, 60, Inf), labels = c("0-30","31-40","41-50","51-60","61+")), | |
survival = ifelse(is.na(deathIn), followUpIn, deathIn), | |
is_dead = ifelse(is.na(deathIn), FALSE, TRUE)) %>% | |
dplyr::select(-deathIn, -followUpIn, -age) -> phenotypes | |
pVals <- calcPValues(phenotypes = phenotypes,output = output,smallMetadata = smallMetadata) | |
pValsAll <- lapply(1:length(pVals),function(i){ | |
pVals[[i]]$All | |
}) | |
names(pValsAll) <- names(pVals) | |
#pAdjAll <- p.adjust(unlist(pValsAll),method = pAdjMethod) | |
pValsCombination <- lapply(1:length(pVals),function(i){ | |
pVals[[i]]$PerCombination | |
}) | |
names(pValsCombination) <- names(pVals) | |
# modalityGreaterTwo <- length(unlist(lapply(1:length(output$Genes),function(i){ | |
# if(output$Genes[[i]]$modus>2){i} | |
# }))) | |
# | |
# maxCombinations <- max(unlist(lapply(1:length(pValsCombination),function(i){length(pValsCombination[[i]])}))) | |
# pValuesperCombination <- lapply(1:maxCombinations,function(i){ | |
# unlist(lapply(1:length(pValsCombination),function(j){ | |
# pValsCombination[[j]][i] | |
# })) | |
# }) | |
# pAdjCombination <- lapply(1:maxCombinations,function(i){ | |
# perCombination <- p.adjust(unlist(pValuesperCombination[[i]]),method = pAdjMethod) | |
# perCombination <- c(rep(NA,(length(pValsCombination)-length(perCombination))),perCombination) | |
# names(perCombination) <- names(pValsCombination) | |
# perCombination | |
# }) | |
# pAdjPerGene <- lapply(1:length(pValsCombination),function(i){ | |
# unlist(lapply(1:maxCombinations,function(j){ | |
# perGene <- pAdjCombination[[j]][i] | |
# if(is.null(pValsCombination[[i]])){ | |
# names(perGene) <- NA | |
# }else{ | |
# names(perGene) <- names(pValsCombination[[i]])[j] | |
# } | |
# perGene | |
# })) | |
# }) | |
# names(pAdjPerGene) <- names(pValsCombination) | |
grDevices::pdf(paste0(name,"_pVals.pdf"), 7, 5) | |
# graphics::par(mfrow=c(1,2)) | |
graphics::hist(unlist(c(pValsAll,pValsCombination)), breaks=10,xlab = "pValues",main = "Histogram of all pValues") | |
# graphics::hist(unlist(c(pAdjAll,pAdjPerGene)), breaks=10,xlab = "adjusted pValues",main = "Histogram of all adjusted pValues") | |
graphics::hist(unlist(pValsAll), breaks=10,xlab = "pValues of all Groups",main = "Histogram of all group pValues") | |
# graphics::hist(unlist(pAdjAll), breaks=10,xlab = "adjusted pValues of all Groups",main = "Histogram of all group adj. pValues") | |
graphics::hist(unlist(pValsCombination), breaks=10,xlab = "pValues of Combinations",main = "Histogram of all combination pValues") | |
# graphics::hist(unlist(pAdjPerGene), breaks=10,xlab = "adjusted pValues of Combinations",main = "Histogram of all combination adj. pValues") | |
# graphics::par(mfrow=c(1,1)) | |
dev.off() | |
name <- sprintf(paste0(name, ".pdf")) | |
grDevices::pdf(name, 7, 5) | |
plots<- lapply(1:length(output$Genes),function(i){ | |
geneName <- names(output$Genes)[[i]] | |
allPatients <-unlist(output$Genes[[i]]$groups) | |
names(allPatients) <- rep(1:length(output$Genes[[i]]$groups),output$Genes[[i]]$sizes) | |
groupMembership <- unlist(lapply(1:nrow(smallMetadata),function(j){ | |
as.integer(names(allPatients[which(allPatients==smallMetadata$caseID[j])])) | |
})) | |
main = paste(geneName,"(p-value", ifelse(pValsAll[[geneName]]<0.001, "< 0.001", paste("=",round(pValsAll[[geneName]],digits=3))),")") | |
annotation <- c() | |
if(!is.null(pValsCombination[[geneName]])){ | |
annotation <- unlist(lapply(1:length(pValsCombination[[geneName]]),function(j){ | |
paste(names(pValsCombination[[geneName]])[[j]],ifelse(pValsCombination[[geneName]][[j]]<0.001,"< 0.001",paste("=",round(pValsCombination[[geneName]][[j]],digits = 3))),"\n") | |
})) | |
annotation<- paste(annotation,collapse = "") | |
annotation <- paste0("p-Values:\n",annotation) | |
} | |
maxDays = max(phenotypes$survival, na.rm = T) | |
survivalCurve = ggplot2::ggplot(data = phenotypes, ggplot2::aes(time=phenotypes$survival, status=as.numeric(phenotypes$is_dead),color=as.factor(groupMembership))) + | |
ggkm::geom_km() + | |
ggplot2::coord_fixed(ratio = maxDays) + | |
ggplot2::xlab("Time") + | |
ggplot2::ylab("Survival") + | |
ggplot2::ggtitle(main)+ | |
ggplot2::scale_color_discrete(name = "groups")+ | |
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5),legend.justification = c(1,1),legend.position = c(1,1)) | |
plot(cowplot::ggdraw(cowplot::add_sub(survivalCurve,annotation))) | |
# plot(survivalCurve) | |
}) | |
dev.off() | |
# name <- sprintf(paste0(name, ".pdf")) | |
# grDevices::pdf(name, 7, 5) | |
# plots<- lapply(1:length(output$Genes),function(i){ | |
# geneName <- names(output$Genes)[[i]] | |
# allPatients <-unlist(output$Genes[[i]]$groups) | |
# names(allPatients) <- rep(1:length(output$Genes[[i]]$groups),output$Genes[[i]]$sizes) | |
# groupMembership <- unlist(lapply(1:nrow(smallMetadata),function(j){ | |
# as.integer(names(allPatients[which(allPatients==smallMetadata$caseID[j])])) | |
# })) | |
# | |
# main = paste(geneName,"(p-value", ifelse(pAdjAll[[paste0(geneName,".All")]]<0.001, "< 0.001", paste("=",round(pAdjAll[[paste0(geneName,".All")]],digits=3))),")") | |
# annotation <- c() | |
# if(!is.na(pAdjPerGene[[geneName]][1])){ | |
# annotation <- unlist(lapply(1:length(pAdjPerGene[[geneName]]),function(j){ | |
# paste(names(pAdjPerGene[[geneName]])[[j]],ifelse(pAdjPerGene[[geneName]][[j]]<0.001,"< 0.001",paste("=",round(pAdjPerGene[[geneName]][[j]],digits = 3))),"\n") | |
# })) | |
# annotation<- paste(annotation,collapse = "") | |
# annotation <- paste0("p-Values:\n",annotation) | |
# } | |
# maxDays = max(phenotypes$survival, na.rm = T) | |
# survivalCurve = ggplot2::ggplot(data = phenotypes, ggplot2::aes(time=phenotypes$survival, status=as.numeric(phenotypes$is_dead),color=as.factor(groupMembership))) + | |
# ggkm::geom_km() + | |
# ggplot2::coord_fixed(ratio = maxDays) + | |
# ggplot2::xlab("Time") + | |
# ggplot2::ylab("Survival") + | |
# ggplot2::ggtitle(main)+ | |
# ggplot2::scale_color_discrete(name = "groups")+ | |
# ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5),legend.justification = c(1,1),legend.position = c(1,1)) | |
# plot(cowplot::ggdraw(cowplot::add_sub(survivalCurve,annotation))) | |
# # plot(survivalCurve) | |
# }) | |
# dev.off() | |
return(name) | |
} | |
#'Function to calculate the p-Values of the survival curves | |
#' | |
#' @details calculates p-Values of the Chi-squared function for all combinations of patient groups per gene | |
#' @param phenotypes converted metadata with columns "survival" and "is_dead" | |
#' @param output (Filtered) Algorithm Output in the unified output data format | |
#' @param smallMetadata metadata of relevant patients | |
#' @return returns the p-Values of each gene as a list with categories "All" and "perCombination" | |
#' | |
#' @importFrom survival Surv survdiff | |
#' @importFrom utils combn | |
calcPValues <- function(phenotypes,output,smallMetadata){ | |
surv <- survival::Surv(time = phenotypes$survival, event = phenotypes$is_dead) | |
genes <- names(output$Genes) | |
pVals <- sapply(genes, function(i) { | |
allPatients <-unlist(output$Genes[[i]]$groups) | |
names(allPatients) <- rep(1:length(output$Genes[[i]]$groups),output$Genes[[i]]$sizes) | |
groupMembership <- unlist(lapply(1:nrow(smallMetadata),function(j){ | |
as.integer(names(allPatients[which(allPatients==smallMetadata$caseID[j])])) | |
})) | |
d <- survival::survdiff(formula = as.formula("surv ~ groupMembership"), data=phenotypes) | |
pValAll <- pchisq(d$chisq, length(d$n)-1,lower.tail = FALSE) | |
names(pValAll) <- "All" | |
if(max(groupMembership)>2){ | |
groups <- lapply(1:max(groupMembership),function(j){ | |
which(groupMembership==j) | |
}) | |
combinations <- utils::combn(1:max(groupMembership),2) | |
pVals1 <- unlist(lapply(1:ncol(combinations),function(j){ | |
group1 <- which(groupMembership==combinations[,j][1]) | |
group2 <- which(groupMembership==combinations[,j][2]) | |
smallPhenotypes <- phenotypes[c(group1,group2),] | |
smallGroupMembership<- groupMembership[(sort(c(group1,group2)))] | |
smallSurv <- survival::Surv(time = smallPhenotypes$survival,event = smallPhenotypes$is_dead) | |
d <- survival::survdiff(formula = as.formula("smallSurv ~ smallGroupMembership"),data = smallPhenotypes) | |
pVal <- pchisq(d$chisq,length(d$n)-1,lower.tail = FALSE) | |
names(pVal) <- paste(combinations[,j][1],":",combinations[,j][2]) | |
pVal | |
})) | |
pVals <- list("All"=pValAll,"PerCombination"=pVals1) | |
}else{ | |
pVals <- list("All"=pValAll) | |
} | |
}) | |
return(pVals) | |
} |