Skip to content

Commit

Permalink
Edited roxygen comments, SilvermanOverP function, added vignettes
Browse files Browse the repository at this point in the history
Edited many roxygen comments (especially examples and descriptions) to fit the newest version and functions of bimodalR.
Edited function SilvermanOverP in filter.R to erase a mistake.
Added vignettes, one completed(bimodalR) and one not finished(UsingBimodalR).
  • Loading branch information
sbeck committed May 16, 2018
1 parent 8c06005 commit 8d34ba3
Show file tree
Hide file tree
Showing 32 changed files with 1,134 additions and 329 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
.Rhistory
.RData
.Ruserdata
inst/doc
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ Imports: LaplacesDemon,
plyr,
mclust,
diptest
Suggests: genefilter
Suggests: genefilter,
knitr,
rmarkdown
VignetteBuilder: knitr
18 changes: 17 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

export(DipTestOverP)
export(SilvermanOverP)
export(createValidationData)
export(evaluateAlgorithmOutput)
export(getClassifications)
export(getParam)
Expand All @@ -16,9 +15,26 @@ export(useFlexmix)
export(useHdbscan)
export(useMclust)
exportClasses(Params)
import(dbscan)
import(diptest)
import(flexmix)
import(mclust)
import(silvermantest)
importFrom(LaplacesDemon,rinvgaussian)
importFrom(checkmate,assertCharacter)
importFrom(checkmate,assertClass)
importFrom(checkmate,assertList)
importFrom(checkmate,assertString)
importFrom(methods,"slot<-")
importFrom(methods,new)
importFrom(methods,rbind2)
importFrom(methods,slot)
importFrom(methods,slotNames)
importFrom(methods,validObject)
importFrom(plyr,compact)
importFrom(stats,dnorm)
importFrom(stats,rgamma)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(utils,head)
48 changes: 30 additions & 18 deletions R/algorithmSpecificFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,56 @@
#' Use the mclust algorithm for generating output to evaluate the
#' classification (unimodal,bimodal or other) of genes.
#'
#'
#'
#' @param expression (Simulated) Gene Expression data frame which genes shall
#' be classified as unimodal, bimodal or other
#' @param clusterNumbers span of the clusters to be calculated at maximum
#' @param verbose logical. Whether to print progress messages
#' @param parallel logical. Whether to calculate the mclust models in parallel
#' @return The formatted output of the mclust algorithm
#'
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' #with messages printed to console
#' mclustOutput <- useMclust(expression = expression)
#' #without messages printed to console
#' mclustOutput <- useMclust(expression = expression, verbose = FALSE)
#' #without messages printed to console, at maximum 3 clusters
#' mclustOutput <- useMclust(expression = expression, clusterNumbers = c(1:3),
#' verbose = FALSE, parallel = FALSE)
#'
#' @import mclust
#' @export
useMclust <- function(expression,verbose = TRUE,parallel = FALSE) {
#'
useMclust <- function(expression,clusterNumbers = NULL,verbose = TRUE,parallel = FALSE) {
if (verbose) {message("mclust-models are calculated...")}

if(parallel==FALSE){
if(is.null(clusterNumbers)){
if(parallel==FALSE){
models <- lapply(1:nrow(expression), function(i) {
mclust::Mclust((expression[i,]),modelNames=c("V"),verbose = FALSE) })
}
if(parallel==TRUE){
n.cores = parallel::detectCores()-1
cl = parallel::makeCluster(n.cores)
parallel::clusterExport(cl, "expression")
parallel::clusterEvalQ(cl, library(mclust))
models <- parallel::parLapply(cl, 1:nrow(expression), function(i) {
mclust::Mclust((expression[i,]), modelNames=c("V")) })
}
}else{
if(parallel==FALSE){
models <- lapply(1:nrow(expression), function(i) {
mclust::Mclust((expression[i,]),modelNames=c("V"),verbose = FALSE) })
mclust::Mclust((expression[i,]),modelNames=c("V"),verbose = FALSE,G = clusterNumbers) })
}
if(parallel==TRUE){
n.cores = parallel::detectCores()-1
cl = parallel::makeCluster(n.cores)
parallel::clusterExport(cl, "expression")
parallel::clusterEvalQ(cl, library(mclust))
models <- parallel::parLapply(cl, 1:nrow(expression), function(i) {
mclust::Mclust((expression[i,]), modelNames=c("V")) })
mclust::Mclust((expression[i,]), modelNames=c("V"),G = clusterNumbers) })
}
}

if(parallel==TRUE){parallel::stopCluster(cl)}
names(models) = rownames(expression)
patients <- names(expression)
Expand All @@ -51,15 +67,12 @@ useMclust <- function(expression,verbose = TRUE,parallel = FALSE) {
#' Using the output of the mclust algorithm for generating output in a unified
#' format to evaluate the classification (unimodal,bimodal or other) of genes.
#'
#'
#' @param models Unformatted output of the Mclust function from the mclust
#' package of the genes from the gene expression data frame
#' @param patients List of the patients
#'
#' @return The formatted output of the mclust algorithm
#'
#'
#'
mclustClassification <- function(models,patients){

genes <- names(models)
Expand Down Expand Up @@ -94,8 +107,6 @@ mclustClassification <- function(models,patients){
#' Use the HDBSCAN algorithm for generating output to evaluate the
#' classification (unimodal,bimodal or other) of genes.
#'
#'
#'
#' @param expression (Simulated) Gene Expression data frame which genes shall
#' be classified as unimodal, bimodal or other
#' @param minPts Integer; Minimum size of clusters. See details of ?hdbscan
Expand All @@ -104,7 +115,7 @@ mclustClassification <- function(models,patients){
#' @return hdbscanOutput The formatted output of the HDBSCAN clustering
#'
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' #with messages printed to console
Expand All @@ -116,6 +127,7 @@ mclustClassification <- function(models,patients){
#'
#' @import dbscan
#' @export
#'
useHdbscan <- function(expression,minPts = 10,verbose = TRUE){
if(verbose){message("HDBSCAN-clusters are calculated...")}
genes <- rownames(expression)
Expand Down Expand Up @@ -187,7 +199,6 @@ useHdbscan <- function(expression,minPts = 10,verbose = TRUE){
#' @param error The difference of a value predicted by a model and an actually
#' observed value
#'
#'
rmse <- function(error){
error <- unlist(error)
error2 <- unlist(lapply(1:length(error),function(i){if(!is.na(error[[i]])){c(error[[i]])}}))
Expand All @@ -208,7 +219,7 @@ rmse <- function(error){
#' @return flexmixOutput The formatted output of the best fitting models from flexmix
#'
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=75,"2"=list("gauss"= 75,"gamma"= 75)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' #with messages printed to console
Expand All @@ -219,6 +230,7 @@ rmse <- function(error){
#' @import flexmix
#' @importFrom plyr compact
#' @export
#'
useFlexmix <- function(expression,maxModality, verbose = TRUE,parallel=FALSE){
modelGauss <- c()
modelGamma <- c()
Expand Down
18 changes: 7 additions & 11 deletions R/evaluate.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ isApproxSame<- function(validationVal,classVal,maxDistance){
#' value and the validation data value with which the estimated is classified
#' as TRUE (correctly estimated)
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
Expand Down Expand Up @@ -137,7 +137,7 @@ evaluateAlgorithmOutput <- function(modality,algorithmOutput,validationData,maxD
#' @param validationData The validationData behind the simulated gene
#' expression data frame
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
Expand Down Expand Up @@ -187,7 +187,7 @@ getClassification <- function(modality,algorithmOutput,algorithmName="mclust",va
#' @param algorithmName The name of the algorithm which created the output
#' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
Expand All @@ -213,24 +213,21 @@ getStatistic <- function(evaluation,algorithmName ="mclust"){
#' @param evaluations list of named evaluations
#' @return A data frame with the percentual proportion of as TRUE or FALSE evaluated values
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' mclustOutput <- useMclust(expression = expression,verbose = FALSE)
#' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE)
#' flexmixOutput <- useFlexmix(expression = expression,maxModality = 2,verbose = FALSE)
#' mclustEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = mclustOutput,
#' validationData = validationData)
#' hdbscanEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = hdbscanOutput,
#' validationData = validationData)
#' flexmixEvaluation <- evaluateAlgorithmOutput(modality = 2,algorithmOutput = flexmixOutput,
#' validationData = validationData)
#'
#' #getStatistics of one algorithm
#' getStatistics(evaluations = list("mclust"=mclustEval))
#' getStatistics(evaluations = list("mclust"=mclustEvaluation))
#' #getStatistics of two or more algorithms
#' getStatistics(evaluations = list("mclust"=mclustEval,"HDBSCAN"=hdbscanEval))
#' getStatistics(evaluations = list("mclust"=mclustEvaluation,"HDBSCAN"=hdbscanEvaluation))
#' @export
getStatistics <- function(evaluations) {
numEvaluations <- length(evaluations)
Expand All @@ -254,13 +251,12 @@ getStatistics <- function(evaluations) {
#' @return a classification data frame that contains the number of
#' FN, FP, TN and TP
#' @examples
#' params <- newParams(nGenes = list("1"=100,"2"=list("gauss"= 100,"gamma"= 100)))
#' params <- newParams(nGenes = list("1"=10,"2"=list("gauss"= 10,"gamma"= 10)))
#' simulation <- simulateExpression(params= params,verbose=FALSE)
#' expression <- simulation$expressionData
#' validationData <- simulation$validationData
#' mclustOutput <- useMclust(expression = expression,verbose = FALSE)
#' hdbscanOutput <- useHdbscan(expression = expression,verbose = FALSE)
#' flexmixOutput <- useFlexmix(expression = expression,maxModality = 2,verbose = FALSE)
#'
#' #getClassifications of one algorithm
#' getClassifications(modality=2,validationData,
Expand Down
11 changes: 6 additions & 5 deletions R/filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,27 @@
#' @param na.rm Whether NAs should be removed.
#'
#' @return A function with bindings for p and R.
#' @import silvermantest
#'
#' @export
SilvermanOverP <- function(p, R = 100, na.rm = T) {
function(x) {
if (na.rm)
x <- x[!is.na(x)]
s1 <- silvermantest::silverman.test(x, k = 1, m = 1, R = R, adjust = T)
s1 <- silvermantest::silverman.test(x, k = 1, R = R, adjust = T)
s1@p_value < p
}
}

#' DipTestOverP returns a filter function with bindings for P.
#' This function evaluates to TRUE if the Hartigans Dip Test for unimodality can be rejected at the significance level P.
#'
#' @param p
#' @param R
#' @param na.rm
#' @param p The P-value that is used to reject Hartigans Dip Test of unimodality (
#' @param R The number of bootstrap replicates for each test.
#' @param na.rm Whether NAs should be removed.
#'
#' @return A function with bindings for p and R.
#'
#' @import diptest
#' @export
DipTestOverP <- function(p, R = 100, na.rm = T) {
function(x) {
Expand Down
50 changes: 21 additions & 29 deletions R/params.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,24 @@ setGeneric("setParam",
#' The Params class defines the following parameters:
#'
#' \describe{
#' \item{\code{nUniGenes}}{The number of genes to simulate with the
#' Unimodal scenario.}
#' \item{\code{nBiGenes}}{The number of genes to simulate with the
#' bimodal scenario.}
#' \item{\code{nGuGenes}}{The number of genes to simulate with the
#' gamma and unimodal scenario.}
#' \item{\code{nPatients}}{The number of patients to simulate.}
#' \item{\code{nGenes}}{A list containing the numbers of genes to simulate for
#' each scenario and modality.}
#' \item{\code{means}}{A list containing the parameter range for the mean for
#' each scenario and modality.}
#' \item{\code{foldChanges}}A list containing the parameter range for the log2
#' foldChanges between distributions for multimodal genes. Has to contain one
#' entry less than the modality that is simulated.}
#' \item{\code{sd}}{The parameters mu and lambda for the generation of the
#' standard deviation of each distribution via an Inverse Gaussian distribution.}
#' \item{\code{gamma}}{The shape and rate parameter for generating the gamma
#' distributions.}
#' \item{\code{proportions}}{Parameters for the allowed proportions of
#' multimodal distributions.}
#' \item{\code{nPatients}}{The number of patients to simulate and how
#' many columns the simulated gene expression should have per gene.}
#' \item{\code{seed}}{Seed to use for generating random numbers.}
#' \item{\code{sdParms}}{The parameters mu and lambda for the generation of the
#' distribution of the standard deviation for each szenario.}
#' \item{\code{uniMean}}{The mean span for the gaussian distribution of the
#' unimodal simulation scenario.}
#' \item{\code{biMean}}{The mean span for the gaussian distribution of the
#' bimodal simulation scenario.}
#' \item{\code{foldChange}}{The log2 foldChange between the modes in the
#' bimodal simuation scenario.}
#' \item{\code{proportions}}{The proporties of the different distributions of the
#' bimodal and gamma+unimodal simulation scenario.}
#' \item{\code{guMean}}{The mean span for the gaussian distribution of the
#' gamma+unimodal simulation scenario.}
#' \item{\code{gammaShape}}{The shape parameter for the mean gamma+unimodal
#' distribution.}
#' \item{\code{gammaRate}}{The rate parameter for the mean gamma+unimodal
#' distribution.}
#' }
#'
#' The parameters shown in brackets can be estimated from real data.
#'
#' @name Params
#' @rdname Params
#' @aliases Params-class
Expand Down Expand Up @@ -140,7 +130,8 @@ setMethod("show", "Params", function(object) {
#' params <- newParams("nGenes"=list("1"=100,"2"=list("gauss"=100,"gamma"=100)),
#' "proportions"=c(30,70))
#' params <- newParams(
#' nGenes=list("1"=c(700),"2"=list(gauss = 150, gamma = 150),"3"=list(gauss = 150, gamma = 150)),
#' nGenes=list("1"=c(700),"2"=list(gauss = 150, gamma = 150),
#' "3"=list(gauss = 150, gamma = 150)),
#' means=list("1"= c(2, 4),"2"= c(2, 4),"3"= c(2, 4)),
#' foldChanges = list("1"=NA,"2"= list(gauss = c(2, 4), gamma = c(2, 4)),
#' "3"= list(gauss = list(c(2, 4), c(2, 4)), gamma = list(c(2, 4), c(2, 4)))))
Expand Down Expand Up @@ -197,12 +188,13 @@ getParams <- function(params, names) {
#' params <- newParams()
#' params
#' # Set individually
#' params <- params <- setParams(params,proportions = c(80,20),foldChanges = list("1"=c(2,4),
#' "2"= list("gauss"=c(2,5),"gamma"=c(3,5))))
#' params <- params <- setParams(params,proportions = c(80,20),
#' foldChanges = list("1"=c(2,4),"2"= list("gauss"=c(2,5),"gamma"=c(3,5))))
#' params
#' # Set via update list
#' params <- setParams(params,update = list("nGenes"=list("1"=100,
#' "2"=list("gauss"=200,"gamma"=100)),"means"=list("1"=c(1,3),"2"=c(2,4)),"nPatients"=700))
#' "2"=list("gauss"=200,"gamma"=100)),"means"=list("1"=c(1,3),"2"=c(2,4)),
#' "nPatients"=700))
#' params
#' @export
setParams <- function(params, update = NULL, ...) {
Expand Down
Loading

0 comments on commit 8d34ba3

Please sign in to comment.