Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#' Simulation
#'
#' Simulate gene expression for the different simulation scenarios: Unimodal,
#' multimodal with gaussian distributions, multimodal starting with a gamma distribution and followed by gaussian distributions
#'
#' @param params Params object containing simulation parameters for all scenarios.
#' @param verbose logical. Whether to print progress messages
#' @param ... any additional parameter settings to override what is provided in \code{params}.
#'
#' @details
#' The 'modus' is the number of occurring distributions within one gene.
#' Modus '1' - Unimodal gaussian distribution
#' Gene expression for the unimodal gaussian distribution (scenario '1') is
#' simulated from the mean and the standard deviation taken from the validation data
#' for this gene. The number of values that are simulated for each
#' distribution equals the number of Patients {'nPatients'}:
#'
#' All other modi consist of two different scenarios:
#' In the 'gauss' scenario all occurring distributions are gaussian distributions.
#' In the 'gamma' scenario the first occuring distribution is a gamma distribution.
#' All other distributions are gaussian distributions.
#'
#' Modus '2' or higher - Bimodal distributions or higher modalities
#'
#' Gene expressions of the multimodal genes are
#' simulated automatically for each distribution.
#' Scenario 'gauss'
#' The different gaussian distributions are simulated with the corresponding mean
#' and a standard deviation and number of values taken from the validation data
#' for this gene.
#'
#' Scenario 'gamma'
#' The gamma distribution is simulated with the gamma shape and rate values from
#' the params object. The other gaussian distributions are simulated with the values
#' from the validation data.
#'
#' @return A named list with two components: 'validationData' and 'expressionData'
#'
#' @examples
#' \dontrun{
#' simulation <- simulateExpression(params = newParams())
#' # Override default parameters
#' # Not recommended, better use setParams; See \code{\link{Params}}
#' simulation <- simulateExpression(nGenes = list("1"=10,
#' "2"=list("gauss"= 10,"gamma"= 10)), nPatients = 50)
#' # without messages printed to the console
#' params <- newParams()
#' simulation <- simulateExpression(params = params,verbose = FALSE)
#' }
#' @export
#' @importFrom checkmate assertClass assertString assertCharacter assertList
#' @importFrom stats dnorm rgamma rnorm runif sd
#'
simulateExpression <- function(params = newParams(), verbose = TRUE,...) {
checkmate::assertClass(params, "Params")
params <- setParams(params, ...)
seed <- getParam(params, "seed")
set.seed(seed)
valData <- createValidationData(params = params, verbose = verbose)
nPatients <- getParam(params,"nPatients")
numGenes <- sum(unlist(getParam(params,name = "nGenes")))
if (verbose){message("Drawing gene expression values...")}
simNotAdjusted <- lapply(1:numGenes,function(i){
if(valData[[i]]$scenario=="unimodal-gaussian"){
expression <- stats::rnorm(n = nPatients,mean = valData[[i]]$means,sd = valData[[i]]$sds)
}else if(valData[[i]]$scenario=="multimodal-gaussian"){
expression <- unlist(lapply(1:valData[[i]]$modus,function(j){
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){
expression <- stats::rnorm(n = ceiling(valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j])
}else{
expression <- stats::rnorm(n = (valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j])
}
}))
}else if(valData[[i]]$scenario=="multimodal-gamma+gaussian"){
expression <- unlist(lapply(1:valData[[i]]$modus,function(j){
if(j==1){
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){
expression <- stats::rgamma(n = ceiling(valData[[i]]$sizes[j]),shape = params@gamma$shape,rate = params@gamma$rate)
}else{
expression <- stats::rgamma(n = (valData[[i]]$sizes[j]),shape = params@gamma$shape,rate = params@gamma$rate)
}
}else{
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){
expression <- stats::rnorm(n = ceiling(valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j])
}else{
expression <- stats::rnorm(n = (valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j])
}
}
}))
}
expression
})
simNotAdjusted <- data.frame(do.call('rbind',simNotAdjusted))
sim <- lapply(1:nrow(simNotAdjusted),function(i){
values <- unlist(lapply(1:ncol(simNotAdjusted),function(j){
if(simNotAdjusted[i,j]<0){
0
}else{
simNotAdjusted[i,j]
}
}))
})
sim <- data.frame(do.call('rbind',sim))
genes <- names(valData)
patients <- sprintf("Patient%04d", 1:nPatients)
rownames(sim) <- genes
colnames(sim) <- patients
if(verbose){message("Finished.")}
return(list(validationData = valData, expressionData = sim))
}
#' Creating the validation data for algorithms.
#'
#' The validation data is generated by using the `Params` object and the parameters
#' that are stored in it. It is generated automatically when the
#' simulateExpression() function is executed
#' @param params Params object containing simulation parameters for all scenarios.
#' @param verbose logical. Whether to print progress messages
#'
#' @return A data.frame with validation data, from which expression values will be simulated.
#'
#' @details
#' For changing the parameters, please consider the following:
#'
#' Type shows with which type the slot has to be filled,
#' Usage shows further information for the usage of the values,
#' Default values describes the default values used to generate a `Params`
#' object.
#'
#' Parameter Type Default values
#'
#' nGenes: list, Default value: list("1"= 700, "2" = list("gauss"=150,"gamma"=150)),
#' means: list, Default value: list("1"=c(2,4),"2" = c(2,4)),
#' foldChanges: list, Default value: list("1" = c(2,4),"2" = list("gauss"= c(2,4),"gamma" = c(3,5))),
#' sd: list, Default value: list("mu"= 0.61,"lambda"=2.21),
#' gamma: list, Default value: list("shape" = 2, "rate" = 2),
#' proportions: vector, Default value: c(10,20,30,40,50,60,70,80,90),
#' nPatients: numeric, Default value: 200,
#' seed: numeric, Default value: sample(1:1e6, 1)
#'
#' @importFrom LaplacesDemon rinvgaussian
#'
createValidationData <- function(params = newParams(),verbose = TRUE){
numGenes <- sum(unlist(getParam(params,name = "nGenes")))
if (verbose) { message("Drawing parameters for expression simulation...") }
genes <- sprintf("Gene%04d",1:numGenes)
reps <- c()
reps <- unlist(lapply(1:length(unlist(params@nGenes)),function(i){
if(i == 1){
reps <- rep(x = i,times = params@nGenes$`1`)
}else{
if(i%%2==0){
reps <- c(reps,rep(x = i, times = sum(unlist(eval(parse(text=(paste0("params@nGenes$`",floor(i/2)+1,"`$gauss"))))))))
}else{
reps <- c(reps,rep(x = i, times = sum(unlist(eval(parse(text=(paste0("params@nGenes$`",floor(i/2)+1,"`$gamma"))))))))
}
}
}))
scenarios <- sample(x = reps,size = numGenes,replace = FALSE)
drawGeneParams <- function(nPatients,modality, proportions,means=NULL,sdParms,foldChanges = NULL,gammaParms = NULL){
ranMean1 <- c()
ranSd1 <- c()
if(!is.null(gammaParms)){
ranMean1 <- gammaParms$shape/gammaParms$rate
ranSd1 <- sqrt(gammaParms$shape/(gammaParms$rate^2))
}else{
ranMean1 <- stats::runif(1, min = means[1], max = means[2])
ranSd1 <- LaplacesDemon::rinvgaussian(n = 1, mu = sdParms$mu, lambda = sdParms$lambda)
}
ranProps <- 100
ranFCs <- NA
means <- c()
ranSd <- c()
if(modality >= 2){
repProp <- rep(proportions,modality)
combs <- t(utils::combn(x = repProp,m = modality,simplify=TRUE))
combinations <- unlist(lapply(1:nrow(combs),function(i){
if(sum(combs[i,])==100){
i
}
}))
selectedCombs <- combs[combinations,]
combsSorted <- lapply(1:nrow(selectedCombs),function(i){
sort(selectedCombs[i,])
})
combs <- do.call('rbind',combsSorted)
uniqueCombs <- unique.array(combs)
sampleProp <- sample(1:nrow(uniqueCombs),1,replace = FALSE)
ranProps <- sample(uniqueCombs[sampleProp,])
ranSd<- unlist(lapply(2:modality,function(i){
eval(parse(text = paste(paste0("ranSd",i),
"<- ",
LaplacesDemon::rinvgaussian(n = 1, mu = sdParms$mu, lambda = sdParms$lambda))))
}))
if(modality==2){
ranFCs <- unlist(lapply(1:1,function(i){
eval(parse(text = paste(paste0("ranFC",i),
"<-",(stats::runif(1, min = foldChanges[1], max = foldChanges[2])))))
}))
}else{
ranFCs <- unlist(lapply(1:(length(foldChanges)),function(i){
if(modality ==2){
eval(parse(text = paste(paste0("ranFC",i),
"<-",(stats::runif(1, min = foldChanges[1], max = foldChanges[2])))))
}else{
eval(parse(text = paste(paste0("ranFC",i),
"<-",(stats::runif(1, min = foldChanges[[i]][1], max = foldChanges[[i]][2])))))
}
}))
}
eval(parse(text = paste(paste0("ranMean",2:modality),
"<-",NA)))
for(i in 1:modality){
if(i!=1){
eval(parse(text = paste(paste0("ranMean",(i)),"<-",paste0("ranMean",(i-1)),"*",paste0("ranFCs[",(i-1),"]"))))
}
}
}
sds <- c(ranSd1,ranSd)
means <- unlist(lapply(1:modality,function(i){
eval(parse(text = paste(paste0("ranMean",(i)))))
}))
sizes <- (ranProps/100)*nPatients
patients <- 1
groups <- lapply(1:length(sizes),function(i){
if(i==1){
sprintf("Patient%04d",1:sizes[i])
}else{
sprintf("Patient%04d",((sum(sizes[1:(i-1)])+1):(sizes[i]+sum(sizes[1:(i-1)]))))
}
})
scenario <-c()
if(modality==1){
scenario = "unimodal-gaussian"
}else if(modality>1&&is.null(gammaParms)){
scenario = "multimodal-gaussian"
}else{
scenario <- "multimodal-gamma+gaussian"
}
return(list("modus"=modality,"scenario"=scenario,"means"=means,"foldChanges"=ranFCs,"sds"=sds,"proportions"=ranProps,"sizes"=sizes,"groups"=groups))
}
valData <- lapply(1:length(scenarios), function(i) {
if(scenarios[i]==1){
gP <- drawGeneParams(nPatients = params@nPatients,
modality = 1,
proportions = params@proportions,
means = params@means$`1`,
sdParms = params@sd)
}
if((scenarios[i]>1)&&(scenarios[i]%%2==0)){
gP <- drawGeneParams(nPatients = params@nPatients,
modality = (floor(scenarios[i]/2))+1,
proportions = params@proportions,
means = eval(parse(text = paste(paste0("params@means$`",((floor(scenarios[i]/2))+1),"`")))),
sdParms = params@sd,
foldChanges = eval(parse(text = paste(paste0("params@foldChanges$`",((floor(scenarios[i]/2))+1),"`$gauss")))))
}
if((scenarios[i]>1)&&(scenarios[i]%%2!=0)){
gP <- drawGeneParams(nPatients = params@nPatients,
modality = (floor(scenarios[i]/2))+1,
proportions = params@proportions,
sdParms = params@sd,
foldChanges = eval(parse(text = paste(paste0("params@foldChanges$`",((floor(scenarios[i]/2))+1),"`$gamma")))),
gammaParms = params@gamma)
}
return(gP)
})
names(valData)<- genes
if(verbose){ message("Finished creating validation data.") }
return(valData)
}