Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
multimodalR/R/simulation.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
285 lines (270 sloc)
11.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Simulation | |
#' | |
#' Simulate gene expression for the different simulation scenarios: Unimodal, | |
#' multimodal with gaussian distributions, multimodal starting with a gamma distribution and followed by gaussian distributions | |
#' | |
#' @param params Params object containing simulation parameters for all scenarios. | |
#' @param verbose logical. Whether to print progress messages | |
#' @param ... any additional parameter settings to override what is provided in \code{params}. | |
#' | |
#' @details | |
#' The 'modus' is the number of occurring distributions within one gene. | |
#' Modus '1' - Unimodal gaussian distribution | |
#' Gene expression for the unimodal gaussian distribution (scenario '1') is | |
#' simulated from the mean and the standard deviation taken from the validation data | |
#' for this gene. The number of values that are simulated for each | |
#' distribution equals the number of Patients {'nPatients'}: | |
#' | |
#' All other modi consist of two different scenarios: | |
#' In the 'gauss' scenario all occurring distributions are gaussian distributions. | |
#' In the 'gamma' scenario the first occuring distribution is a gamma distribution. | |
#' All other distributions are gaussian distributions. | |
#' | |
#' Modus '2' or higher - Bimodal distributions or higher modalities | |
#' | |
#' Gene expressions of the multimodal genes are | |
#' simulated automatically for each distribution. | |
#' Scenario 'gauss' | |
#' The different gaussian distributions are simulated with the corresponding mean | |
#' and a standard deviation and number of values taken from the validation data | |
#' for this gene. | |
#' | |
#' Scenario 'gamma' | |
#' The gamma distribution is simulated with the gamma shape and rate values from | |
#' the params object. The other gaussian distributions are simulated with the values | |
#' from the validation data. | |
#' | |
#' @return A named list with two components: 'validationData' and 'expressionData' | |
#' | |
#' @examples | |
#' \dontrun{ | |
#' simulation <- simulateExpression(params = newParams()) | |
#' # Override default parameters | |
#' # Not recommended, better use setParams; See \code{\link{Params}} | |
#' simulation <- simulateExpression(nGenes = list("1"=10, | |
#' "2"=list("gauss"= 10,"gamma"= 10)), nPatients = 50) | |
#' # without messages printed to the console | |
#' params <- newParams() | |
#' simulation <- simulateExpression(params = params,verbose = FALSE) | |
#' } | |
#' @export | |
#' @importFrom checkmate assertClass assertString assertCharacter assertList | |
#' @importFrom stats dnorm rgamma rnorm runif sd | |
#' | |
simulateExpression <- function(params = newParams(), verbose = TRUE,...) { | |
checkmate::assertClass(params, "Params") | |
params <- setParams(params, ...) | |
seed <- getParam(params, "seed") | |
set.seed(seed) | |
valData <- createValidationData(params = params, verbose = verbose) | |
nPatients <- getParam(params,"nPatients") | |
numGenes <- sum(unlist(getParam(params,name = "nGenes"))) | |
if (verbose){message("Drawing gene expression values...")} | |
simNotAdjusted <- lapply(1:numGenes,function(i){ | |
if(valData[[i]]$scenario=="unimodal-gaussian"){ | |
expression <- stats::rnorm(n = nPatients,mean = valData[[i]]$means,sd = valData[[i]]$sds) | |
}else if(valData[[i]]$scenario=="multimodal-gaussian"){ | |
expression <- unlist(lapply(1:valData[[i]]$modus,function(j){ | |
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){ | |
expression <- stats::rnorm(n = ceiling(valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
}else{ | |
expression <- stats::rnorm(n = (valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
} | |
})) | |
}else if(valData[[i]]$scenario=="multimodal-gamma+gaussian"){ | |
expression <- unlist(lapply(1:valData[[i]]$modus,function(j){ | |
if(j==1){ | |
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){ | |
expression <- stats::rgamma(n = ceiling(valData[[i]]$sizes[j]),shape = params@gamma$shape,rate = params@gamma$rate) | |
}else{ | |
expression <- stats::rgamma(n = (valData[[i]]$sizes[j]),shape = params@gamma$shape,rate = params@gamma$rate) | |
} | |
}else{ | |
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){ | |
expression <- stats::rnorm(n = ceiling(valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
}else{ | |
expression <- stats::rnorm(n = (valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
} | |
} | |
})) | |
} | |
expression | |
}) | |
simNotAdjusted <- data.frame(do.call('rbind',simNotAdjusted)) | |
sim <- lapply(1:nrow(simNotAdjusted),function(i){ | |
values <- unlist(lapply(1:ncol(simNotAdjusted),function(j){ | |
if(simNotAdjusted[i,j]<0){ | |
0 | |
}else{ | |
simNotAdjusted[i,j] | |
} | |
})) | |
}) | |
sim <- data.frame(do.call('rbind',sim)) | |
genes <- names(valData) | |
patients <- sprintf("Patient%04d", 1:nPatients) | |
rownames(sim) <- genes | |
colnames(sim) <- patients | |
if(verbose){message("Finished.")} | |
return(list(validationData = valData, expressionData = sim)) | |
} | |
#' Creating the validation data for algorithms. | |
#' | |
#' The validation data is generated by using the `Params` object and the parameters | |
#' that are stored in it. It is generated automatically when the | |
#' simulateExpression() function is executed | |
#' @param params Params object containing simulation parameters for all scenarios. | |
#' @param verbose logical. Whether to print progress messages | |
#' | |
#' @return A data.frame with validation data, from which expression values will be simulated. | |
#' | |
#' @details | |
#' For changing the parameters, please consider the following: | |
#' | |
#' Type shows with which type the slot has to be filled, | |
#' Usage shows further information for the usage of the values, | |
#' Default values describes the default values used to generate a `Params` | |
#' object. | |
#' | |
#' Parameter Type Default values | |
#' | |
#' nGenes: list, Default value: list("1"= 700, "2" = list("gauss"=150,"gamma"=150)), | |
#' means: list, Default value: list("1"=c(2,4),"2" = c(2,4)), | |
#' foldChanges: list, Default value: list("1" = c(2,4),"2" = list("gauss"= c(2,4),"gamma" = c(3,5))), | |
#' sd: list, Default value: list("mu"= 0.61,"lambda"=2.21), | |
#' gamma: list, Default value: list("shape" = 2, "rate" = 2), | |
#' proportions: vector, Default value: c(10,20,30,40,50,60,70,80,90), | |
#' nPatients: numeric, Default value: 200, | |
#' seed: numeric, Default value: sample(1:1e6, 1) | |
#' | |
#' @importFrom LaplacesDemon rinvgaussian | |
#' | |
createValidationData <- function(params = newParams(),verbose = TRUE){ | |
numGenes <- sum(unlist(getParam(params,name = "nGenes"))) | |
if (verbose) { message("Drawing parameters for expression simulation...") } | |
genes <- sprintf("Gene%04d",1:numGenes) | |
reps <- c() | |
reps <- unlist(lapply(1:length(unlist(params@nGenes)),function(i){ | |
if(i == 1){ | |
reps <- rep(x = i,times = params@nGenes$`1`) | |
}else{ | |
if(i%%2==0){ | |
reps <- c(reps,rep(x = i, times = sum(unlist(eval(parse(text=(paste0("params@nGenes$`",floor(i/2)+1,"`$gauss")))))))) | |
}else{ | |
reps <- c(reps,rep(x = i, times = sum(unlist(eval(parse(text=(paste0("params@nGenes$`",floor(i/2)+1,"`$gamma")))))))) | |
} | |
} | |
})) | |
scenarios <- sample(x = reps,size = numGenes,replace = FALSE) | |
drawGeneParams <- function(nPatients,modality, proportions,means=NULL,sdParms,foldChanges = NULL,gammaParms = NULL){ | |
ranMean1 <- c() | |
ranSd1 <- c() | |
if(!is.null(gammaParms)){ | |
ranMean1 <- gammaParms$shape/gammaParms$rate | |
ranSd1 <- sqrt(gammaParms$shape/(gammaParms$rate^2)) | |
}else{ | |
ranMean1 <- stats::runif(1, min = means[1], max = means[2]) | |
ranSd1 <- LaplacesDemon::rinvgaussian(n = 1, mu = sdParms$mu, lambda = sdParms$lambda) | |
} | |
ranProps <- 100 | |
ranFCs <- NA | |
means <- c() | |
ranSd <- c() | |
if(modality >= 2){ | |
repProp <- rep(proportions,modality) | |
combs <- t(utils::combn(x = repProp,m = modality,simplify=TRUE)) | |
combinations <- unlist(lapply(1:nrow(combs),function(i){ | |
if(sum(combs[i,])==100){ | |
i | |
} | |
})) | |
selectedCombs <- combs[combinations,] | |
combsSorted <- lapply(1:nrow(selectedCombs),function(i){ | |
sort(selectedCombs[i,]) | |
}) | |
combs <- do.call('rbind',combsSorted) | |
uniqueCombs <- unique.array(combs) | |
sampleProp <- sample(1:nrow(uniqueCombs),1,replace = FALSE) | |
ranProps <- sample(uniqueCombs[sampleProp,]) | |
ranSd<- unlist(lapply(2:modality,function(i){ | |
eval(parse(text = paste(paste0("ranSd",i), | |
"<- ", | |
LaplacesDemon::rinvgaussian(n = 1, mu = sdParms$mu, lambda = sdParms$lambda)))) | |
})) | |
if(modality==2){ | |
ranFCs <- unlist(lapply(1:1,function(i){ | |
eval(parse(text = paste(paste0("ranFC",i), | |
"<-",(stats::runif(1, min = foldChanges[1], max = foldChanges[2]))))) | |
})) | |
}else{ | |
ranFCs <- unlist(lapply(1:(length(foldChanges)),function(i){ | |
if(modality ==2){ | |
eval(parse(text = paste(paste0("ranFC",i), | |
"<-",(stats::runif(1, min = foldChanges[1], max = foldChanges[2]))))) | |
}else{ | |
eval(parse(text = paste(paste0("ranFC",i), | |
"<-",(stats::runif(1, min = foldChanges[[i]][1], max = foldChanges[[i]][2]))))) | |
} | |
})) | |
} | |
eval(parse(text = paste(paste0("ranMean",2:modality), | |
"<-",NA))) | |
for(i in 1:modality){ | |
if(i!=1){ | |
eval(parse(text = paste(paste0("ranMean",(i)),"<-",paste0("ranMean",(i-1)),"*",paste0("ranFCs[",(i-1),"]")))) | |
} | |
} | |
} | |
sds <- c(ranSd1,ranSd) | |
means <- unlist(lapply(1:modality,function(i){ | |
eval(parse(text = paste(paste0("ranMean",(i))))) | |
})) | |
sizes <- (ranProps/100)*nPatients | |
patients <- 1 | |
groups <- lapply(1:length(sizes),function(i){ | |
if(i==1){ | |
sprintf("Patient%04d",1:sizes[i]) | |
}else{ | |
sprintf("Patient%04d",((sum(sizes[1:(i-1)])+1):(sizes[i]+sum(sizes[1:(i-1)])))) | |
} | |
}) | |
scenario <-c() | |
if(modality==1){ | |
scenario = "unimodal-gaussian" | |
}else if(modality>1&&is.null(gammaParms)){ | |
scenario = "multimodal-gaussian" | |
}else{ | |
scenario <- "multimodal-gamma+gaussian" | |
} | |
return(list("modus"=modality,"scenario"=scenario,"means"=means,"foldChanges"=ranFCs,"sds"=sds,"proportions"=ranProps,"sizes"=sizes,"groups"=groups)) | |
} | |
valData <- lapply(1:length(scenarios), function(i) { | |
if(scenarios[i]==1){ | |
gP <- drawGeneParams(nPatients = params@nPatients, | |
modality = 1, | |
proportions = params@proportions, | |
means = params@means$`1`, | |
sdParms = params@sd) | |
} | |
if((scenarios[i]>1)&&(scenarios[i]%%2==0)){ | |
gP <- drawGeneParams(nPatients = params@nPatients, | |
modality = (floor(scenarios[i]/2))+1, | |
proportions = params@proportions, | |
means = eval(parse(text = paste(paste0("params@means$`",((floor(scenarios[i]/2))+1),"`")))), | |
sdParms = params@sd, | |
foldChanges = eval(parse(text = paste(paste0("params@foldChanges$`",((floor(scenarios[i]/2))+1),"`$gauss"))))) | |
} | |
if((scenarios[i]>1)&&(scenarios[i]%%2!=0)){ | |
gP <- drawGeneParams(nPatients = params@nPatients, | |
modality = (floor(scenarios[i]/2))+1, | |
proportions = params@proportions, | |
sdParms = params@sd, | |
foldChanges = eval(parse(text = paste(paste0("params@foldChanges$`",((floor(scenarios[i]/2))+1),"`$gamma")))), | |
gammaParms = params@gamma) | |
} | |
return(gP) | |
}) | |
names(valData)<- genes | |
if(verbose){ message("Finished creating validation data.") } | |
return(valData) | |
} |