Edited roxygen comments, SilvermanOverP function, added vignettes

Edited many roxygen comments (especially examples and descriptions) to fit the newest version and functions of bimodalR. Edited function SilvermanOverP in filter.R to erase a mistake. Added vignettes, one completed(bimodalR) and one not finished(UsingBimodalR).
loosolab · May 16, 2018 · 8d34ba3 · 8d34ba3
1 parent 8c06005
commit 8d34ba3
Show file tree

Hide file tree

Showing 32 changed files with 1,134 additions and 329 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,4 +18,7 @@ Imports: LaplacesDemon,
     plyr,
     mclust,
     diptest
-Suggests: genefilter
+Suggests: genefilter,
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,7 +2,6 @@
 
 export(DipTestOverP)
 export(SilvermanOverP)
-export(createValidationData)
 export(evaluateAlgorithmOutput)
 export(getClassifications)
 export(getParam)
@@ -16,9 +15,26 @@ export(useFlexmix)
 export(useHdbscan)
 export(useMclust)
 exportClasses(Params)
+import(dbscan)
+import(diptest)
+import(flexmix)
+import(mclust)
+import(silvermantest)
 importFrom(LaplacesDemon,rinvgaussian)
 importFrom(checkmate,assertCharacter)
 importFrom(checkmate,assertClass)
 importFrom(checkmate,assertList)
 importFrom(checkmate,assertString)
+importFrom(methods,"slot<-")
+importFrom(methods,new)
+importFrom(methods,rbind2)
+importFrom(methods,slot)
+importFrom(methods,slotNames)
+importFrom(methods,validObject)
+importFrom(plyr,compact)
+importFrom(stats,dnorm)
+importFrom(stats,rgamma)
+importFrom(stats,rnorm)
+importFrom(stats,runif)
+importFrom(stats,sd)
 importFrom(utils,head)
diff --git a/R/algorithmSpecificFunctions.R b/R/algorithmSpecificFunctions.R
@@ -3,40 +3,56 @@
 #' Use the mclust algorithm for generating output to evaluate the
 #' classification (unimodal,bimodal or other) of genes.
 #'
-#'
-#'
 #' @param expression (Simulated) Gene Expression data frame which genes shall
 #' be classified as unimodal, bimodal or other
+#' @param clusterNumbers span of the clusters to be calculated at maximum
 #' @param verbose logical. Whether to print progress messages
 #' @param parallel logical. Whether to calculate the mclust models in parallel
 #' @return The formatted output of the mclust algorithm
 #'
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' #with messages printed to console
 #' mclustOutput <- useMclust(expression = expression)
-#' #without messages printed to console
-#' mclustOutput <- useMclust(expression = expression, verbose = FALSE)
+#' #without messages printed to console, at maximum 3 clusters
+#' mclustOutput <- useMclust(expression = expression, clusterNumbers = c(1:3),
+#' verbose = FALSE, parallel = FALSE)
 #'
 #' @import mclust
 #' @export
-useMclust <- function(expression,verbose = TRUE,parallel = FALSE) {
+#'
+useMclust <- function(expression,clusterNumbers = NULL,verbose = TRUE,parallel = FALSE) {
   if (verbose) {message("mclust-models are calculated...")}
-
-  if(parallel==FALSE){
+  if(is.null(clusterNumbers)){
+    if(parallel==FALSE){
+      models <- lapply(1:nrow(expression), function(i) {
+        mclust::Mclust((expression[i,]),modelNames=c("V"),verbose = FALSE) })
+    }
+    if(parallel==TRUE){
+      n.cores = parallel::detectCores()-1
+      cl = parallel::makeCluster(n.cores)
+      parallel::clusterExport(cl, "expression")
+      parallel::clusterEvalQ(cl, library(mclust))
+      models <- parallel::parLapply(cl, 1:nrow(expression), function(i) {
+        mclust::Mclust((expression[i,]), modelNames=c("V")) })
+    }
+  }else{
+    if(parallel==FALSE){
     models <- lapply(1:nrow(expression), function(i) {
-      mclust::Mclust((expression[i,]),modelNames=c("V"),verbose = FALSE) })
+      mclust::Mclust((expression[i,]),modelNames=c("V"),verbose = FALSE,G = clusterNumbers) })
   }
   if(parallel==TRUE){
     n.cores = parallel::detectCores()-1
     cl = parallel::makeCluster(n.cores)
     parallel::clusterExport(cl, "expression")
     parallel::clusterEvalQ(cl, library(mclust))
     models <- parallel::parLapply(cl, 1:nrow(expression), function(i) {
-      mclust::Mclust((expression[i,]), modelNames=c("V")) })
+      mclust::Mclust((expression[i,]), modelNames=c("V"),G = clusterNumbers) })
   }
+  }
+
   if(parallel==TRUE){parallel::stopCluster(cl)}
   names(models) = rownames(expression)
   patients <-  names(expression)
@@ -51,15 +67,12 @@ useMclust <- function(expression,verbose = TRUE,parallel = FALSE) {
 #' Using the output of the mclust algorithm for generating output in a unified
 #' format to evaluate the classification (unimodal,bimodal or other) of genes.
 #'
-#'
 #' @param models Unformatted output of the Mclust function from the mclust
 #' package of the genes from the gene expression data frame
 #' @param patients List of the patients
 #'
 #' @return The formatted output of the mclust algorithm
 #'
-#'
-#'
 mclustClassification <- function(models,patients){
 
   genes <- names(models)
@@ -94,8 +107,6 @@ mclustClassification <- function(models,patients){
 #' Use the HDBSCAN algorithm for generating output to evaluate the
 #' classification (unimodal,bimodal or other) of genes.
 #'
-#'
-#'
 #' @param expression (Simulated) Gene Expression data frame which genes shall
 #' be classified as unimodal, bimodal or other
 #' @param minPts Integer; Minimum size of clusters. See details of ?hdbscan
@@ -104,7 +115,7 @@ mclustClassification <- function(models,patients){
 #' @return hdbscanOutput The formatted output of the HDBSCAN clustering
 #'
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' #with messages printed to console
@@ -116,6 +127,7 @@ mclustClassification <- function(models,patients){
 #'
 #' @import dbscan
 #' @export
+#'
 useHdbscan <- function(expression,minPts = 10,verbose = TRUE){
   if(verbose){message("HDBSCAN-clusters are calculated...")}
   genes <- rownames(expression)
@@ -187,7 +199,6 @@ useHdbscan <- function(expression,minPts = 10,verbose = TRUE){
 #' @param error The difference of a value predicted by a model and an actually
 #' observed value
 #'
-#'
 rmse <- function(error){
   error <- unlist(error)
   error2 <- unlist(lapply(1:length(error),function(i){if(!is.na(error[[i]])){c(error[[i]])}}))
@@ -208,7 +219,7 @@ rmse <- function(error){
 #' @return flexmixOutput The formatted output of the best fitting models from flexmix
 #'
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=75,"2"=list("gauss"= 75,"gamma"= 75)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' #with messages printed to console
@@ -219,6 +230,7 @@ rmse <- function(error){
 #' @import flexmix
 #' @importFrom plyr compact
 #' @export
+#'
 useFlexmix <- function(expression,maxModality, verbose = TRUE,parallel=FALSE){
   modelGauss <- c()
   modelGamma <- c()

diff --git a/R/evaluate.R b/R/evaluate.R
@@ -66,7 +66,7 @@ isApproxSame<- function(validationVal,classVal,maxDistance){
 #' value and the validation data value with which the estimated is classified
 #' as TRUE (correctly estimated)
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' validationData <- simulation$validationData
@@ -137,7 +137,7 @@ evaluateAlgorithmOutput <- function(modality,algorithmOutput,validationData,maxD
 #' @param validationData The validationData behind the simulated gene
 #' expression data frame
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' validationData <- simulation$validationData
@@ -187,7 +187,7 @@ getClassification <- function(modality,algorithmOutput,algorithmName="mclust",va
 #' @param algorithmName The name of the algorithm which created the output
 #' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' validationData <- simulation$validationData
@@ -213,24 +213,21 @@ getStatistic <- function(evaluation,algorithmName ="mclust"){
 #' @param evaluations list of named evaluations
 #' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' validationData <- simulation$validationData
 #' mclustOutput <- useMclust(expression = expression,verbose = FALSE)
 #' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE)
-#' flexmixOutput <- useFlexmix(expression = expression,maxModality = 2,verbose = FALSE)
 #' mclustEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = mclustOutput,
 #' validationData = validationData)
 #' hdbscanEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = hdbscanOutput,
 #' validationData = validationData)
-#' flexmixEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = flexmixOutput,
-#' validationData = validationData)
 #'
 #' #getStatistics of one algorithm
-#' getStatistics(evaluations = list("mclust"=mclustEval))
+#' getStatistics(evaluations = list("mclust"=mclustEvaluation))
 #' #getStatistics of two or more algorithms
-#' getStatistics(evaluations = list("mclust"=mclustEval,"HDBSCAN"=hdbscanEval))
+#' getStatistics(evaluations = list("mclust"=mclustEvaluation,"HDBSCAN"=hdbscanEvaluation))
 #' @export
 getStatistics <- function(evaluations) {
   numEvaluations <- length(evaluations)
@@ -254,13 +251,12 @@ getStatistics <- function(evaluations) {
 #' @return a classification data frame that contains the number of
 #' FN, FP, TN and TP
 #' @examples
-#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
+#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
 #' simulation <- simulateExpression(params= params,verbose=FALSE)
 #' expression <- simulation$expressionData
 #' validationData <- simulation$validationData
 #' mclustOutput <- useMclust(expression = expression,verbose = FALSE)
 #' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE)
-#' flexmixOutput <- useFlexmix(expression = expression,maxModality = 2,verbose = FALSE)
 #'
 #' #getClassifications of one algorithm
 #' getClassifications(modality=2,validationData,

diff --git a/R/filter.R b/R/filter.R
@@ -6,26 +6,27 @@
 #' @param na.rm Whether NAs should be removed.
 #'
 #' @return A function with bindings for p and R.
+#' @import silvermantest
 #'
 #' @export
 SilvermanOverP <- function(p, R = 100, na.rm = T) {
   function(x) {
     if (na.rm)
       x <- x[!is.na(x)]
-    s1 <- silvermantest::silverman.test(x, k = 1, m = 1, R = R, adjust = T)
+    s1 <- silvermantest::silverman.test(x, k = 1, R = R, adjust = T)
     s1@p_value < p
   }
 }
 
 #' DipTestOverP returns a filter function with bindings for P.
 #' This function evaluates to TRUE if the Hartigans Dip Test for unimodality can be rejected at the significance level P.
 #'
-#' @param p
-#' @param R
-#' @param na.rm
+#' @param p The P-value that is used to reject Hartigans Dip Test of unimodality (
+#' @param R The number of bootstrap replicates for each test.
+#' @param na.rm Whether NAs should be removed.
 #'
 #' @return A function with bindings for p and R.
-#'
+#' @import diptest
 #' @export
 DipTestOverP <- function(p, R = 100, na.rm = T) {
   function(x) {

diff --git a/R/params.R b/R/params.R
@@ -47,34 +47,24 @@ setGeneric("setParam",
 #' The Params class defines the following parameters:
 #'
 #' \describe{
-#'     \item{\code{nUniGenes}}{The number of genes to simulate with the
-#'     Unimodal scenario.}
-#'     \item{\code{nBiGenes}}{The number of genes to simulate with the
-#'     bimodal scenario.}
-#'     \item{\code{nGuGenes}}{The number of genes to simulate with the
-#'     gamma and unimodal scenario.}
-#'     \item{\code{nPatients}}{The number of patients to simulate.}
+#'     \item{\code{nGenes}}{A list containing the numbers of genes to simulate for
+#'     each scenario and modality.}
+#'     \item{\code{means}}{A list containing the parameter range for the mean for
+#'     each scenario and modality.}
+#'     \item{\code{foldChanges}}A list containing the parameter range for the log2
+#'     foldChanges between distributions for multimodal genes. Has to contain one
+#'     entry less than the modality that is simulated.}
+#'     \item{\code{sd}}{The parameters mu and lambda for the generation of the
+#'     standard deviation of each distribution via an Inverse Gaussian distribution.}
+#'     \item{\code{gamma}}{The shape and rate parameter  for generating the gamma
+#'     distributions.}
+#'     \item{\code{proportions}}{Parameters for the allowed proportions of
+#'     multimodal distributions.}
+#'     \item{\code{nPatients}}{The number of patients to simulate and how
+#'     many columns the simulated gene expression should have per gene.}
 #'     \item{\code{seed}}{Seed to use for generating random numbers.}
-#'     \item{\code{sdParms}}{The parameters mu and lambda for the generation of the
-#'     distribution of the standard deviation for each szenario.}
-#'     \item{\code{uniMean}}{The mean span for the gaussian distribution of the
-#'     unimodal simulation scenario.}
-#'     \item{\code{biMean}}{The mean span for the gaussian distribution of the
-#'     bimodal simulation scenario.}
-#'     \item{\code{foldChange}}{The log2 foldChange between the modes in the
-#'     bimodal simuation scenario.}
-#'     \item{\code{proportions}}{The proporties of the different distributions of the
-#'     bimodal and gamma+unimodal simulation scenario.}
-#'     \item{\code{guMean}}{The mean span for the gaussian distribution of the
-#'     gamma+unimodal simulation scenario.}
-#'     \item{\code{gammaShape}}{The shape parameter for the mean gamma+unimodal
-#'     distribution.}
-#'     \item{\code{gammaRate}}{The rate parameter for the mean gamma+unimodal
-#'     distribution.}
 #' }
 #'
-#' The parameters shown in brackets can be estimated from real data.
-#'
 #' @name Params
 #' @rdname Params
 #' @aliases Params-class
@@ -140,7 +130,8 @@ setMethod("show", "Params", function(object) {
 #' params <- newParams("nGenes"=list("1"=100,"2"=list("gauss"=100,"gamma"=100)),
 #' "proportions"=c(30,70))
 #' params <- newParams(
-#' nGenes=list("1"=c(700),"2"=list(gauss = 150, gamma = 150),"3"=list(gauss = 150, gamma = 150)),
+#' nGenes=list("1"=c(700),"2"=list(gauss = 150, gamma = 150),
+#' "3"=list(gauss = 150, gamma = 150)),
 #' means=list("1"= c(2, 4),"2"= c(2, 4),"3"= c(2, 4)),
 #' foldChanges = list("1"=NA,"2"= list(gauss = c(2, 4), gamma = c(2, 4)),
 #' "3"= list(gauss = list(c(2, 4), c(2, 4)), gamma = list(c(2, 4), c(2, 4)))))
@@ -197,12 +188,13 @@ getParams <- function(params, names) {
 #' params <- newParams()
 #' params
 #' # Set individually
-#' params <- params <- setParams(params,proportions = c(80,20),foldChanges = list("1"=c(2,4),
-#' "2"= list("gauss"=c(2,5),"gamma"=c(3,5))))
+#' params <- params <- setParams(params,proportions = c(80,20),
+#' foldChanges = list("1"=c(2,4),"2"= list("gauss"=c(2,5),"gamma"=c(3,5))))
 #' params
 #' # Set via update list
 #' params <- setParams(params,update = list("nGenes"=list("1"=100,
-#' "2"=list("gauss"=200,"gamma"=100)),"means"=list("1"=c(1,3),"2"=c(2,4)),"nPatients"=700))
+#' "2"=list("gauss"=200,"gamma"=100)),"means"=list("1"=c(1,3),"2"=c(2,4)),
+#' "nPatients"=700))
 #' params
 #' @export
 setParams <- function(params, update = NULL, ...) {