#' compare Scenarios #' #' Compare the scenario of each gene that was assigned by the algorithm with #' the scenario that is written in the validation Data. #' @param modality numerical, for which modality the scenarioComparing is calculated, 1 for unimodal, #' 2 for bimodal,... #' @param algorithmOutput The output to be compared #' @param validationData The validationData behind the simulated gene expression data frame #' #' @return Returns a list that classifies each assignment as #' TN (TRUE Negative),TP (TRUE Positive), FN (FALSE Negative), FP (FALSE Positive) #' @details #' TN (TRUE Negative): validationData: not modality, assigned scenario: not modality #' #' TP (TRUE Positive): validationData: modality, assigned scenario: modality #' #' FN (FALSE Negative): validationData: modality, assigned scenario: not modality #' #' FP (FALSE Positive): validationData: not modality, assigned scenario: modality compareScenarios <- function(modality, algorithmOutput,validationData){ compareScenarios <- (unlist(lapply(1:length(algorithmOutput$Genes),function(i){ if(validationData[[i]]$modus != modality && algorithmOutput$Genes[[i]]$modus!=modality){ evaluation <- "TN" }else if(validationData[[i]]$modus ==modality && algorithmOutput$Genes[[i]]$modus==modality){ evaluation <- "TP" }else if(validationData[[i]]$modus !=modality && algorithmOutput$Genes[[i]]$modus==modality){ evaluation <- "FP" }else if(validationData[[i]]$modus ==modality && algorithmOutput$Genes[[i]]$modus!=modality){ evaluation <- "FN" }else{ evaluation <- NA } }))) return(compareScenarios) } #' Is Approximately the Same #' #' checks whether the difference between two values is less than or equal the #' maxDistance #' @param validationVal value of the validationData (exact value) #' @param classVal estimated value (approx. value) #' @param maxDistance percentage of maximum distance of the values #' #' @return TRUE, if the difference between the two values is less than or #' equal the maxDistance; FALSE, if the difference between the two values is #' greater than the maxDistance #' isApproxSame<- function(validationVal,classVal,maxDistance){ difference <- (abs((validationVal - classVal)/validationVal)*100) if(difference <= maxDistance){ return(TRUE) }else{ return(FALSE) } } #' Evaluate AlgorithmOutput #' #' @param modality numerical, for which modality output is evaluated, 1 for unimodal, #' 2 for bimodal,... #' @param algorithmOutput The formatted output of the algorithm that is now #' evaluated #' @param validationData the validationData data.frame #' @param maxDifference the maximum percentage difference between the estimated #' value and the validation data value with which the estimated is classified #' as TRUE (correctly estimated) #' @examples #' \dontrun{ #' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10))) #' simulation <- simulateExpression(params= params,verbose=FALSE) #' expression <- simulation$expressionData #' validationData <- simulation$validationData #' output <- useMclust(expression, verbose=FALSE) #' # TRUE if less than +- 10 % difference to value of validationData #' evaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = output, #' validationData = validationData) #' # TRUE if less than +- 20 % difference to value of validationData #' evaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = output, #' validationData = validationData,maxDifference = 20) #' } #' @return Returns a data.frame where for each as TP classified gene it was #' evaluated whether it was estimated approximately correct or not (TRUE or #' FALSE) #' @details #' The estimated values of mean1, mean2, sd1, sd2, proportion1 and proportion2 #' of each as TP classified gene are compared to those values of the validationData. #' If the difference is greater than 10 %, the estimation is classified as #' FALSE (not correctly estimated) for this value. If the difference is less #' or equal to 10%, the estimation is classified as TRUE (correctly estimated) #' #' @export evaluateAlgorithmOutput <- function(modality,algorithmOutput,validationData,maxDifference = 10){ compared <- compareScenarios(modality = modality,algorithmOutput = algorithmOutput,validationData = validationData) genes <- names(validationData) approxSame <- (lapply(1:length(algorithmOutput$Genes),function(i){ if((algorithmOutput$Genes[[i]]$modus==modality) && (validationData[[i]]$modus==modality)){ classValsMean <- sort(algorithmOutput$Genes[[i]]$means) x <- algorithmOutput$Genes[[i]]$sds names(x) <- order(algorithmOutput$Genes[[i]]$means) classValSDs <- x[order(names(x))] y <- algorithmOutput$Genes[[i]]$sizes names(y) <- order(algorithmOutput$Genes[[i]]$means) classValProps <- y[order(names(y))] means <- unlist(lapply(1:length(validationData[[i]]$means),function(j){ isApproxSame(validationVal = validationData[[i]]$means[j],classVal = classValsMean[j],maxDistance = maxDifference) })) sds <- unlist(lapply(1:length(validationData[[i]]$sds),function(j){ isApproxSame(validationVal = validationData[[i]]$sds[j],classVal = classValSDs[j],maxDistance = maxDifference) })) props <- unlist(lapply(1:length(validationData[[i]]$sizes),function(j){ isApproxSame(validationVal = validationData[[i]]$sizes[j],classVal = classValProps[j],maxDistance = maxDifference) })) c(means,sds,props) } })) names(approxSame) <- genes evaluation<- data.frame(do.call('rbind',approxSame)) meanNames <- sprintf(c("mean%s"),(1:(ncol(evaluation)/3))) sdNames <- sprintf(c("sd%s"),(1:(ncol(evaluation)/3))) propNames <- sprintf(c("prop%s"),(1:(ncol(evaluation)/3))) colnames(evaluation) <- c(meanNames,sdNames,propNames) return(evaluation) } #' getClassification #' #' Returns the numbers of TN (TRUE Negative),TP (TRUE Positive), #' FN (FALSE Negative) and FP (FALSE Positive) for one of the algorithm's #' formatted outputs. #' @param modality numerical, for which modality the classification is calculated, 1 for unimodal, #' 2 for bimodal,... #' @param algorithmOutput The formatted output of the algorithm #' @param algorithmName The name of the evaluated algorithm #' @param validationData The validationData behind the simulated gene #' expression data frame #' @examples #' \dontrun{ #' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10))) #' simulation <- simulateExpression(params= params,verbose=FALSE) #' expression <- simulation$expressionData #' validationData <- simulation$validationData #' mclustOutput <- useMclust(expression, verbose=FALSE) #' getClassifications(modality = 2,validationData = validationData, #' outputs = list("mclust" = mclustOutput)) #' } #' @return a classification data frame that contains the number of #' FN, FP, TN and TP #' #' @details #' TN (TRUE Negative): validationData: unimodal, assigned scenario: unimodal #' #' TP (TRUE Positive): validationData: bimodal, assigned scenario: bimodal #' #' FN (FALSE Negative): validationData: bimodal, assigned scenario: unimodal #' or #' FN (FALSE Negative): validationData: unimodal/bimodal, #' assigned scenario: more than bimodal #' #' FP (FALSE Positive): validationData: unimodal, assigned scenario: bimodal #' getClassification <- function(modality,algorithmOutput,algorithmName="mclust",validationData){ compared <- compareScenarios(modality = modality, algorithmOutput = algorithmOutput,validationData = validationData) fn <- table(compared)["FN"] fp <- table(compared)["FP"] tn <- table(compared)["TN"] tp <- table(compared)["TP"] classification <- data.frame(matrix(nrow = 1,ncol = 4,data = c(fn,fp,tn,tp), byrow = TRUE)) colnames(classification)<-c("FN","FP","TN","TP") rownames(classification)<-c(algorithmName) return(classification) } #' getStatistic #' #' This function produces informations about the evaluation of one #' algorithms. The result is a data frame where the percentage of as TRUE and #' as FALSE evaluated values of the genes #' #' @param evaluation The output of the evaluation of the formatted output #' @param algorithmName The name of the algorithm which created the output #' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values #' @examples #' \dontrun{ #' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10))) #' simulation <- simulateExpression(params= params,verbose=FALSE) #' expression <- simulation$expressionData #' validationData <- simulation$validationData #' mclustOutput <- useMclust(expression, verbose=FALSE) #' mclustEvaluation <- evaluateAlgorithmOutput(modality = 2, algorithmOutput = mclustOutput, #' validationData = validationData) #' getStatistics(evaluations = list("mclust" = mclustEvaluation)) #' } getStatistic <- function(evaluation,algorithmName ="mclust"){ true <- (table(unlist(evaluation))["TRUE"]/(table(unlist(evaluation))["FALSE"]+table(unlist(evaluation))["TRUE"]))*100 false <- (table(unlist(evaluation))["FALSE"]/(table(unlist(evaluation))["FALSE"]+table(unlist(evaluation))["TRUE"]))*100 stats <- data.frame(matrix(ncol = 1, nrow = 2,data = c(true,false),byrow = TRUE)) colnames(stats)<- c(algorithmName) rownames(stats)<- c("TRUE [%]","FALSE [%]") return(stats) } #' Statistics #' #' This function produces informations about the evaluation of the three #' algorithms. The result is a data frame where the percentage of as TRUE and #' as FALSE evaluated values of the genes #' #' @param evaluations list of named evaluations #' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values #' @examples #' \dontrun{ #' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10))) #' simulation <- simulateExpression(params= params,verbose=FALSE) #' expression <- simulation$expressionData #' validationData <- simulation$validationData #' mclustOutput <- useMclust(expression = expression,verbose = FALSE) #' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE) #' mclustEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = mclustOutput, #' validationData = validationData) #' hdbscanEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = hdbscanOutput, #' validationData = validationData) #' #' #getStatistics of one algorithm #' getStatistics(evaluations = list("mclust"=mclustEvaluation)) #' #getStatistics of two or more algorithms #' getStatistics(evaluations = list("mclust"=mclustEvaluation,"HDBSCAN"=hdbscanEvaluation)) #' } #' @export getStatistics <- function(evaluations) { numEvaluations <- length(evaluations) statistics <- lapply(1:numEvaluations, function(i) {getStatistic(evaluation = evaluations[[i]], algorithmName = names(evaluations)[i])}) stats <- data.frame(do.call('cbind',statistics)) colnames(stats)<- names(evaluations) rownames(stats)<- c("TRUE [%]","FALSE [%]") return(stats) } #' getClassifications #' #' Returns the numbers of TN (TRUE Negative),TP (TRUE Positive), #' FN (FALSE Negative) and FP (FALSE Positive) for each of the algorithm's #' formatted outputs. #' @param modality numerical, for which modality the classification is calculated, 1 for unimodal, #' 2 for bimodal,... #' @param outputs list of named outputs #' @param validationData The validationData behind the simulated gene expression data frame #' #' @return a classification data frame that contains the number of #' FN, FP, TN and TP #' @examples #' \dontrun{ #' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10))) #' simulation <- simulateExpression(params= params,verbose=FALSE) #' expression <- simulation$expressionData #' validationData <- simulation$validationData #' mclustOutput <- useMclust(expression = expression,verbose = FALSE) #' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE) #' #' #getClassifications of one algorithm #' getClassifications(modality=2,validationData, #' outputs = list("mclust"=mclustOutput)) #' #getClassifications of two or more algorithms #' getClassifications(modality=2,validationData, #' outputs = list("mclust"=mclustOutput,"HDBSCAN"=hdbscanOutput)) #' } #' #' @details #' TN (TRUE Negative): validationData: unimodal, assigned scenario: unimodal #' #' TP (TRUE Positive): validationData: bimodal, assigned scenario: bimodal #' #' FN (FALSE Negative): validationData: bimodal, assigned scenario: unimodal #' or #' FN (FALSE Negative): validationData: unimodal/bimodal, #' assigned scenario: more than bimodal #' #' FP (FALSE Positive): validationData: unimodal, assigned scenario: bimodal #' #' @export getClassifications <- function(modality,validationData,outputs){ numOutputs <- (length(outputs)) classification <- lapply(1:numOutputs,function(i){ getClassification(modality,algorithmOutput = outputs[[i]], algorithmName = names(outputs)[i], validationData = validationData) }) classification <- data.frame(do.call('rbind',classification)) colnames(classification)<-c("FN","FP","TN","TP") rownames(classification)<-names(outputs) return(classification) } #' Checks if output fulfills the criteria of the unified output data format #' @details prints status messages about performed checks #' @param output to be tested #' @param expressionmatrix that was used by algorithm to create output #' @return logical: TRUE if output is in the correct format, FALSE if any check was failed #' @export isValidUnifiedOutputDataFormat <- function(output,expressionmatrix){ correctType <- FALSE correctLengthOutput <- FALSE correctLengthGenes <- FALSE correctFormatClinicalData <- FALSE correctFormatExpressionmatrix <- FALSE correctFormatAllGenes <- FALSE if(is.list(output)){ message("Output is of type 'list'") correctType <- TRUE }else{ message("Output is not of required type 'list'.") return(FALSE)} if(length(output)==3){ message("Output has a length of 3.") correctLengthOutput <- TRUE }else{ message("Output has not a length of 3.") return(FALSE)} if(length(output$Genes)==nrow(expressionmatrix)){ message("Output$Genes holds the correct number of genes.") correctLengthGenes <- TRUE }else{ message("Output does not hold the correct number of genes.") return(FALSE)} if((is.character(output$ClinicalData))&&(length(output$ClinicalData)==1)){ message("Output$ClinicalData is of the required type 'character' and the required length of 1.") correctFormatClinicalData <- TRUE }else{ message("Output$ClinicalData is not of the required type 'character' and/or not of the required length of 1.") return(FALSE)} if((is.character(output$Expressionmatrix))&&(length(output$Expressionmatrix)==1)){ message("Output$Expressionmatrix is of the required type 'character' and of the required length of 1.") correctFormatExpressionmatrix <- TRUE }else{ message("Output$Expressionmatrix is not of the required type 'character' and/or not of the required length of 1.") return(FALSE)} correctlyFormattedGenes <- try(unlist(lapply(1:length(output$Genes),function(i){ isCorrectFormattedGene <- FALSE allCorrectFormat <- is.integer(output$Genes[[i]]$modus)&& is.double(output$Genes[[i]]$means)&&(length(output$Genes[[i]]$means)==output$Genes[[i]]$modus)&& is.double(output$Genes[[i]]$sds)&&(length(output$Genes[[i]]$sds)==output$Genes[[i]]$modus)&& is.integer(output$Genes[[i]]$sizes)&&(length(output$Genes[[i]]$sizes)==output$Genes[[i]]$modus)&& is.list(output$Genes[[i]]$groups)&&(length(output$Genes[[i]]$groups)==output$Genes[[i]]$modus)&& (sum(output$Genes[[i]]$sizes)==ncol(expressionmatrix)) listCorrectLength<- unlist(lapply(1:length(output$Genes[[i]]$groups),function(j){ length(output$Genes[[i]]$groups[[j]])==output$Genes[[i]]$sizes[[j]] })) allListsCorrectLength <- (!any(listCorrectLength==FALSE)) isCorrectFormattedGene <- allCorrectFormat&&allListsCorrectLength })),silent = T) if(class(correctlyFormattedGenes)=="try-error"){ message("The correct format of the genes could not be evaluated due to wrong characteristics of the Genes section.") return(FALSE)} else{ correctFormatAllGenes <-(!any(correctlyFormattedGenes==FALSE)) } if(correctFormatAllGenes){ message("All Genes are in the correct format.") }else{ message("One or more genes do not fulfil the required format of the Genes section.") } return(correctType&& correctLengthOutput&& correctLengthGenes&& correctFormatClinicalData&& correctFormatExpressionmatrix&& correctFormatAllGenes) }