Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#' compare Scenarios
#'
#' Compare the scenario of each gene that was assigned by the algorithm with
#' the scenario that is written in the validation Data.
#' @param modality numerical, for which modality the scenarioComparing is calculated, 1 for unimodal,
#' 2 for bimodal,...
#' @param algorithmOutput The output to be compared
#' @param validationData The validationData behind the simulated gene expression data frame
#'
#' @return Returns a list that classifies each assignment as
#' TN (TRUE Negative),TP (TRUE Positive), FN (FALSE Negative), FP (FALSE Positive)
#' @details
#' TN (TRUE Negative): validationData: not modality, assigned scenario: not modality
#'
#' TP (TRUE Positive): validationData: modality, assigned scenario: modality
#'
#' FN (FALSE Negative): validationData: modality, assigned scenario: not modality
#'
#' FP (FALSE Positive): validationData: not modality, assigned scenario: modality
compareScenarios <- function(modality, algorithmOutput,validationData){
compareScenarios <- (unlist(lapply(1:length(algorithmOutput$Genes),function(i){
if(validationData[[i]]$modus != modality && algorithmOutput$Genes[[i]]$modus!=modality){
evaluation <- "TN"
}else if(validationData[[i]]$modus ==modality && algorithmOutput$Genes[[i]]$modus==modality){
evaluation <- "TP"
}else if(validationData[[i]]$modus !=modality && algorithmOutput$Genes[[i]]$modus==modality){
evaluation <- "FP"
}else if(validationData[[i]]$modus ==modality && algorithmOutput$Genes[[i]]$modus!=modality){
evaluation <- "FN"
}else{
evaluation <- NA
}
})))
return(compareScenarios)
}
#' Is Approximately the Same
#'
#' checks whether the difference between two values is less than or equal the
#' maxDistance
#' @param validationVal value of the validationData (exact value)
#' @param classVal estimated value (approx. value)
#' @param maxDistance percentage of maximum distance of the values
#'
#' @return TRUE, if the difference between the two values is less than or
#' equal the maxDistance; FALSE, if the difference between the two values is
#' greater than the maxDistance
#'
isApproxSame<- function(validationVal,classVal,maxDistance){
difference <- (abs((validationVal - classVal)/validationVal)*100)
if(difference <= maxDistance){
return(TRUE)
}else{
return(FALSE)
}
}
#' Evaluate AlgorithmOutput
#'
#' @param modality numerical, for which modality output is evaluated, 1 for unimodal,
#' 2 for bimodal,...
#' @param algorithmOutput The formatted output of the algorithm that is now
#' evaluated
#' @param validationData the validationData data.frame
#' @param maxDifference the maximum percentage difference between the estimated
#' value and the validation data value with which the estimated is classified
#' as TRUE (correctly estimated)
#' @examples
#' \dontrun{
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' output <- useMclust(expression, verbose=FALSE)
#' # TRUE if less than +- 10 % difference to value of validationData
#' evaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = output,
#' validationData = validationData)
#' # TRUE if less than +- 20 % difference to value of validationData
#' evaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = output,
#' validationData = validationData,maxDifference = 20)
#' }
#' @return Returns a data.frame where for each as TP classified gene it was
#' evaluated whether it was estimated approximately correct or not (TRUE or
#' FALSE)
#' @details
#' The estimated values of mean1, mean2, sd1, sd2, proportion1 and proportion2
#' of each as TP classified gene are compared to those values of the validationData.
#' If the difference is greater than 10 %, the estimation is classified as
#' FALSE (not correctly estimated) for this value. If the difference is less
#' or equal to 10%, the estimation is classified as TRUE (correctly estimated)
#'
#' @export
evaluateAlgorithmOutput <- function(modality,algorithmOutput,validationData,maxDifference = 10){
compared <- compareScenarios(modality = modality,algorithmOutput = algorithmOutput,validationData = validationData)
genes <- names(validationData)
approxSame <- (lapply(1:length(algorithmOutput$Genes),function(i){
if((algorithmOutput$Genes[[i]]$modus==modality) && (validationData[[i]]$modus==modality)){
classValsMean <- sort(algorithmOutput$Genes[[i]]$means)
x <- algorithmOutput$Genes[[i]]$sds
names(x) <- order(algorithmOutput$Genes[[i]]$means)
classValSDs <- x[order(names(x))]
y <- algorithmOutput$Genes[[i]]$sizes
names(y) <- order(algorithmOutput$Genes[[i]]$means)
classValProps <- y[order(names(y))]
means <- unlist(lapply(1:length(validationData[[i]]$means),function(j){
isApproxSame(validationVal = validationData[[i]]$means[j],classVal = classValsMean[j],maxDistance = maxDifference)
}))
sds <- unlist(lapply(1:length(validationData[[i]]$sds),function(j){
isApproxSame(validationVal = validationData[[i]]$sds[j],classVal = classValSDs[j],maxDistance = maxDifference)
}))
props <- unlist(lapply(1:length(validationData[[i]]$sizes),function(j){
isApproxSame(validationVal = validationData[[i]]$sizes[j],classVal = classValProps[j],maxDistance = maxDifference)
}))
c(means,sds,props)
}
}))
names(approxSame) <- genes
evaluation<- data.frame(do.call('rbind',approxSame))
meanNames <- sprintf(c("mean%s"),(1:(ncol(evaluation)/3)))
sdNames <- sprintf(c("sd%s"),(1:(ncol(evaluation)/3)))
propNames <- sprintf(c("prop%s"),(1:(ncol(evaluation)/3)))
colnames(evaluation) <- c(meanNames,sdNames,propNames)
return(evaluation)
}
#' getClassification
#'
#' Returns the numbers of TN (TRUE Negative),TP (TRUE Positive),
#' FN (FALSE Negative) and FP (FALSE Positive) for one of the algorithm's
#' formatted outputs.
#' @param modality numerical, for which modality the classification is calculated, 1 for unimodal,
#' 2 for bimodal,...
#' @param algorithmOutput The formatted output of the algorithm
#' @param algorithmName The name of the evaluated algorithm
#' @param validationData The validationData behind the simulated gene
#' expression data frame
#' @examples
#' \dontrun{
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' mclustOutput <- useMclust(expression, verbose=FALSE)
#' getClassifications(modality = 2,validationData = validationData,
#' outputs = list("mclust" = mclustOutput))
#' }
#' @return a classification data frame that contains the number of
#' FN, FP, TN and TP
#'
#' @details
#' TN (TRUE Negative): validationData: unimodal, assigned scenario: unimodal
#'
#' TP (TRUE Positive): validationData: bimodal, assigned scenario: bimodal
#'
#' FN (FALSE Negative): validationData: bimodal, assigned scenario: unimodal
#' or
#' FN (FALSE Negative): validationData: unimodal/bimodal,
#' assigned scenario: more than bimodal
#'
#' FP (FALSE Positive): validationData: unimodal, assigned scenario: bimodal
#'
getClassification <- function(modality,algorithmOutput,algorithmName="mclust",validationData){
compared <- compareScenarios(modality = modality,
algorithmOutput = algorithmOutput,validationData = validationData)
fn <- table(compared)["FN"]
fp <- table(compared)["FP"]
tn <- table(compared)["TN"]
tp <- table(compared)["TP"]
classification <- data.frame(matrix(nrow = 1,ncol = 4,data = c(fn,fp,tn,tp),
byrow = TRUE))
colnames(classification)<-c("FN","FP","TN","TP")
rownames(classification)<-c(algorithmName)
return(classification)
}
#' getStatistic
#'
#' This function produces informations about the evaluation of one
#' algorithms. The result is a data frame where the percentage of as TRUE and
#' as FALSE evaluated values of the genes
#'
#' @param evaluation The output of the evaluation of the formatted output
#' @param algorithmName The name of the algorithm which created the output
#' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values
#' @examples
#' \dontrun{
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' mclustOutput <- useMclust(expression, verbose=FALSE)
#' mclustEvaluation <- evaluateAlgorithmOutput(modality = 2, algorithmOutput = mclustOutput,
#' validationData = validationData)
#' getStatistics(evaluations = list("mclust" = mclustEvaluation))
#' }
getStatistic <- function(evaluation,algorithmName ="mclust"){
true <- (table(unlist(evaluation))["TRUE"]/(table(unlist(evaluation))["FALSE"]+table(unlist(evaluation))["TRUE"]))*100
false <- (table(unlist(evaluation))["FALSE"]/(table(unlist(evaluation))["FALSE"]+table(unlist(evaluation))["TRUE"]))*100
stats <- data.frame(matrix(ncol = 1, nrow = 2,data = c(true,false),byrow = TRUE))
colnames(stats)<- c(algorithmName)
rownames(stats)<- c("TRUE [%]","FALSE [%]")
return(stats)
}
#' Statistics
#'
#' This function produces informations about the evaluation of the three
#' algorithms. The result is a data frame where the percentage of as TRUE and
#' as FALSE evaluated values of the genes
#'
#' @param evaluations list of named evaluations
#' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values
#' @examples
#' \dontrun{
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' mclustOutput <- useMclust(expression = expression,verbose = FALSE)
#' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE)
#' mclustEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = mclustOutput,
#' validationData = validationData)
#' hdbscanEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = hdbscanOutput,
#' validationData = validationData)
#'
#' #getStatistics of one algorithm
#' getStatistics(evaluations = list("mclust"=mclustEvaluation))
#' #getStatistics of two or more algorithms
#' getStatistics(evaluations = list("mclust"=mclustEvaluation,"HDBSCAN"=hdbscanEvaluation))
#' }
#' @export
getStatistics <- function(evaluations) {
numEvaluations <- length(evaluations)
statistics <- lapply(1:numEvaluations, function(i) {getStatistic(evaluation = evaluations[[i]], algorithmName = names(evaluations)[i])})
stats <- data.frame(do.call('cbind',statistics))
colnames(stats)<- names(evaluations)
rownames(stats)<- c("TRUE [%]","FALSE [%]")
return(stats)
}
#' getClassifications
#'
#' Returns the numbers of TN (TRUE Negative),TP (TRUE Positive),
#' FN (FALSE Negative) and FP (FALSE Positive) for each of the algorithm's
#' formatted outputs.
#' @param modality numerical, for which modality the classification is calculated, 1 for unimodal,
#' 2 for bimodal,...
#' @param outputs list of named outputs
#' @param validationData The validationData behind the simulated gene expression data frame
#'
#' @return a classification data frame that contains the number of
#' FN, FP, TN and TP
#' @examples
#' \dontrun{
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' mclustOutput <- useMclust(expression = expression,verbose = FALSE)
#' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE)
#'
#' #getClassifications of one algorithm
#' getClassifications(modality=2,validationData,
#' outputs = list("mclust"=mclustOutput))
#' #getClassifications of two or more algorithms
#' getClassifications(modality=2,validationData,
#' outputs = list("mclust"=mclustOutput,"HDBSCAN"=hdbscanOutput))
#' }
#'
#' @details
#' TN (TRUE Negative): validationData: unimodal, assigned scenario: unimodal
#'
#' TP (TRUE Positive): validationData: bimodal, assigned scenario: bimodal
#'
#' FN (FALSE Negative): validationData: bimodal, assigned scenario: unimodal
#' or
#' FN (FALSE Negative): validationData: unimodal/bimodal,
#' assigned scenario: more than bimodal
#'
#' FP (FALSE Positive): validationData: unimodal, assigned scenario: bimodal
#'
#' @export
getClassifications <- function(modality,validationData,outputs){
numOutputs <- (length(outputs))
classification <- lapply(1:numOutputs,function(i){
getClassification(modality,algorithmOutput = outputs[[i]],
algorithmName = names(outputs)[i],
validationData = validationData)
})
classification <- data.frame(do.call('rbind',classification))
colnames(classification)<-c("FN","FP","TN","TP")
rownames(classification)<-names(outputs)
return(classification)
}
#' Checks if output fulfills the criteria of the unified output data format
#' @details prints status messages about performed checks
#' @param output to be tested
#' @param expressionmatrix that was used by algorithm to create output
#' @return logical: TRUE if output is in the correct format, FALSE if any check was failed
#' @export
isValidUnifiedOutputDataFormat <- function(output,expressionmatrix){
correctType <- FALSE
correctLengthOutput <- FALSE
correctLengthGenes <- FALSE
correctFormatClinicalData <- FALSE
correctFormatExpressionmatrix <- FALSE
correctFormatAllGenes <- FALSE
if(is.list(output)){
message("Output is of type 'list'")
correctType <- TRUE
}else{
message("Output is not of required type 'list'.")
return(FALSE)}
if(length(output)==3){
message("Output has a length of 3.")
correctLengthOutput <- TRUE
}else{
message("Output has not a length of 3.")
return(FALSE)}
if(length(output$Genes)==nrow(expressionmatrix)){
message("Output$Genes holds the correct number of genes.")
correctLengthGenes <- TRUE
}else{
message("Output does not hold the correct number of genes.")
return(FALSE)}
if((is.character(output$ClinicalData))&&(length(output$ClinicalData)==1)){
message("Output$ClinicalData is of the required type 'character' and the required length of 1.")
correctFormatClinicalData <- TRUE
}else{
message("Output$ClinicalData is not of the required type 'character' and/or not of the required length of 1.")
return(FALSE)}
if((is.character(output$Expressionmatrix))&&(length(output$Expressionmatrix)==1)){
message("Output$Expressionmatrix is of the required type 'character' and of the required length of 1.")
correctFormatExpressionmatrix <- TRUE
}else{
message("Output$Expressionmatrix is not of the required type 'character' and/or not of the required length of 1.")
return(FALSE)}
correctlyFormattedGenes <- try(unlist(lapply(1:length(output$Genes),function(i){
isCorrectFormattedGene <- FALSE
allCorrectFormat <- is.integer(output$Genes[[i]]$modus)&&
is.double(output$Genes[[i]]$means)&&(length(output$Genes[[i]]$means)==output$Genes[[i]]$modus)&&
is.double(output$Genes[[i]]$sds)&&(length(output$Genes[[i]]$sds)==output$Genes[[i]]$modus)&&
is.integer(output$Genes[[i]]$sizes)&&(length(output$Genes[[i]]$sizes)==output$Genes[[i]]$modus)&&
is.list(output$Genes[[i]]$groups)&&(length(output$Genes[[i]]$groups)==output$Genes[[i]]$modus)&&
(sum(output$Genes[[i]]$sizes)==ncol(expressionmatrix))
listCorrectLength<- unlist(lapply(1:length(output$Genes[[i]]$groups),function(j){
length(output$Genes[[i]]$groups[[j]])==output$Genes[[i]]$sizes[[j]]
}))
allListsCorrectLength <- (!any(listCorrectLength==FALSE))
isCorrectFormattedGene <- allCorrectFormat&&allListsCorrectLength
})),silent = T)
if(class(correctlyFormattedGenes)=="try-error"){
message("The correct format of the genes could not be evaluated due to wrong characteristics of the Genes section.")
return(FALSE)}
else{
correctFormatAllGenes <-(!any(correctlyFormattedGenes==FALSE))
}
if(correctFormatAllGenes){
message("All Genes are in the correct format.")
}else{
message("One or more genes do not fulfil the required format of the Genes section.")
}
return(correctType&&
correctLengthOutput&&
correctLengthGenes&&
correctFormatClinicalData&&
correctFormatExpressionmatrix&&
correctFormatAllGenes)
}