Permalink
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
multimodalR/R/simulation.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Checked and updated all functions Added multiple filtering functions Added functions to process TCGA data Added data to exclude genes on allosomes Checked & updated vignettes Changed Package name to multimodalR Added function to check for valid unified output data format Added function to plot Survival Curves for TCGA data Added function to check for known bimodal genes in output First Complete Version of multimodalR
285 lines (270 sloc)
11.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Simulation | |
#' | |
#' Simulate gene expression for the different simulation scenarios: Unimodal, | |
#' multimodal with gaussian distributions, multimodal starting with a gamma distribution and followed by gaussian distributions | |
#' | |
#' @param params Params object containing simulation parameters for all scenarios. | |
#' @param verbose logical. Whether to print progress messages | |
#' @param ... any additional parameter settings to override what is provided in \code{params}. | |
#' | |
#' @details | |
#' The 'modus' is the number of occurring distributions within one gene. | |
#' Modus '1' - Unimodal gaussian distribution | |
#' Gene expression for the unimodal gaussian distribution (scenario '1') is | |
#' simulated from the mean and the standard deviation taken from the validation data | |
#' for this gene. The number of values that are simulated for each | |
#' distribution equals the number of Patients {'nPatients'}: | |
#' | |
#' All other modi consist of two different scenarios: | |
#' In the 'gauss' scenario all occurring distributions are gaussian distributions. | |
#' In the 'gamma' scenario the first occuring distribution is a gamma distribution. | |
#' All other distributions are gaussian distributions. | |
#' | |
#' Modus '2' or higher - Bimodal distributions or higher modalities | |
#' | |
#' Gene expressions of the multimodal genes are | |
#' simulated automatically for each distribution. | |
#' Scenario 'gauss' | |
#' The different gaussian distributions are simulated with the corresponding mean | |
#' and a standard deviation and number of values taken from the validation data | |
#' for this gene. | |
#' | |
#' Scenario 'gamma' | |
#' The gamma distribution is simulated with the gamma shape and rate values from | |
#' the params object. The other gaussian distributions are simulated with the values | |
#' from the validation data. | |
#' | |
#' @return A named list with two components: 'validationData' and 'expressionData' | |
#' | |
#' @examples | |
#' \dontrun{ | |
#' simulation <- simulateExpression(params = newParams()) | |
#' # Override default parameters | |
#' # Not recommended, better use setParams; See \code{\link{Params}} | |
#' simulation <- simulateExpression(nGenes = list("1"=10, | |
#' "2"=list("gauss"= 10,"gamma"= 10)), nPatients = 50) | |
#' # without messages printed to the console | |
#' params <- newParams() | |
#' simulation <- simulateExpression(params = params,verbose = FALSE) | |
#' } | |
#' @export | |
#' @importFrom checkmate assertClass assertString assertCharacter assertList | |
#' @importFrom stats dnorm rgamma rnorm runif sd | |
#' | |
simulateExpression <- function(params = newParams(), verbose = TRUE,...) { | |
checkmate::assertClass(params, "Params") | |
params <- setParams(params, ...) | |
seed <- getParam(params, "seed") | |
set.seed(seed) | |
valData <- createValidationData(params = params, verbose = verbose) | |
nPatients <- getParam(params,"nPatients") | |
numGenes <- sum(unlist(getParam(params,name = "nGenes"))) | |
if (verbose){message("Drawing gene expression values...")} | |
simNotAdjusted <- lapply(1:numGenes,function(i){ | |
if(valData[[i]]$scenario=="unimodal-gaussian"){ | |
expression <- stats::rnorm(n = nPatients,mean = valData[[i]]$means,sd = valData[[i]]$sds) | |
}else if(valData[[i]]$scenario=="multimodal-gaussian"){ | |
expression <- unlist(lapply(1:valData[[i]]$modus,function(j){ | |
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){ | |
expression <- stats::rnorm(n = ceiling(valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
}else{ | |
expression <- stats::rnorm(n = (valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
} | |
})) | |
}else if(valData[[i]]$scenario=="multimodal-gamma+gaussian"){ | |
expression <- unlist(lapply(1:valData[[i]]$modus,function(j){ | |
if(j==1){ | |
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){ | |
expression <- stats::rgamma(n = ceiling(valData[[i]]$sizes[j]),shape = params@gamma$shape,rate = params@gamma$rate) | |
}else{ | |
expression <- stats::rgamma(n = (valData[[i]]$sizes[j]),shape = params@gamma$shape,rate = params@gamma$rate) | |
} | |
}else{ | |
if((sum(valData[[i]]$sizes)-valData[[i]]$sizes[j]+ceiling(valData[[i]]$sizes[j]))<=nPatients){ | |
expression <- stats::rnorm(n = ceiling(valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
}else{ | |
expression <- stats::rnorm(n = (valData[[i]]$sizes[j]),mean = valData[[i]]$means[j],sd = valData[[i]]$sds[j]) | |
} | |
} | |
})) | |
} | |
expression | |
}) | |
simNotAdjusted <- data.frame(do.call('rbind',simNotAdjusted)) | |
sim <- lapply(1:nrow(simNotAdjusted),function(i){ | |
values <- unlist(lapply(1:ncol(simNotAdjusted),function(j){ | |
if(simNotAdjusted[i,j]<0){ | |
0 | |
}else{ | |
simNotAdjusted[i,j] | |
} | |
})) | |
}) | |
sim <- data.frame(do.call('rbind',sim)) | |
genes <- names(valData) | |
patients <- sprintf("Patient%04d", 1:nPatients) | |
rownames(sim) <- genes | |
colnames(sim) <- patients | |
if(verbose){message("Finished.")} | |
return(list(validationData = valData, expressionData = sim)) | |
} | |
#' Creating the validation data for algorithms. | |
#' | |
#' The validation data is generated by using the `Params` object and the parameters | |
#' that are stored in it. It is generated automatically when the | |
#' simulateExpression() function is executed | |
#' @param params Params object containing simulation parameters for all scenarios. | |
#' @param verbose logical. Whether to print progress messages | |
#' | |
#' @return A data.frame with validation data, from which expression values will be simulated. | |
#' | |
#' @details | |
#' For changing the parameters, please consider the following: | |
#' | |
#' Type shows with which type the slot has to be filled, | |
#' Usage shows further information for the usage of the values, | |
#' Default values describes the default values used to generate a `Params` | |
#' object. | |
#' | |
#' Parameter Type Default values | |
#' | |
#' nGenes: list, Default value: list("1"= 700, "2" = list("gauss"=150,"gamma"=150)), | |
#' means: list, Default value: list("1"=c(2,4),"2" = c(2,4)), | |
#' foldChanges: list, Default value: list("1" = c(2,4),"2" = list("gauss"= c(2,4),"gamma" = c(3,5))), | |
#' sd: list, Default value: list("mu"= 0.61,"lambda"=2.21), | |
#' gamma: list, Default value: list("shape" = 2, "rate" = 2), | |
#' proportions: vector, Default value: c(10,20,30,40,50,60,70,80,90), | |
#' nPatients: numeric, Default value: 200, | |
#' seed: numeric, Default value: sample(1:1e6, 1) | |
#' | |
#' @importFrom LaplacesDemon rinvgaussian | |
#' | |
createValidationData <- function(params = newParams(),verbose = TRUE){ | |
numGenes <- sum(unlist(getParam(params,name = "nGenes"))) | |
if (verbose) { message("Drawing parameters for expression simulation...") } | |
genes <- sprintf("Gene%04d",1:numGenes) | |
reps <- c() | |
reps <- unlist(lapply(1:length(unlist(params@nGenes)),function(i){ | |
if(i == 1){ | |
reps <- rep(x = i,times = params@nGenes$`1`) | |
}else{ | |
if(i%%2==0){ | |
reps <- c(reps,rep(x = i, times = sum(unlist(eval(parse(text=(paste0("params@nGenes$`",floor(i/2)+1,"`$gauss")))))))) | |
}else{ | |
reps <- c(reps,rep(x = i, times = sum(unlist(eval(parse(text=(paste0("params@nGenes$`",floor(i/2)+1,"`$gamma")))))))) | |
} | |
} | |
})) | |
scenarios <- sample(x = reps,size = numGenes,replace = FALSE) | |
drawGeneParams <- function(nPatients,modality, proportions,means=NULL,sdParms,foldChanges = NULL,gammaParms = NULL){ | |
ranMean1 <- c() | |
ranSd1 <- c() | |
if(!is.null(gammaParms)){ | |
ranMean1 <- gammaParms$shape/gammaParms$rate | |
ranSd1 <- sqrt(gammaParms$shape/(gammaParms$rate^2)) | |
}else{ | |
ranMean1 <- stats::runif(1, min = means[1], max = means[2]) | |
ranSd1 <- LaplacesDemon::rinvgaussian(n = 1, mu = sdParms$mu, lambda = sdParms$lambda) | |
} | |
ranProps <- 100 | |
ranFCs <- NA | |
means <- c() | |
ranSd <- c() | |
if(modality >= 2){ | |
repProp <- rep(proportions,modality) | |
combs <- t(utils::combn(x = repProp,m = modality,simplify=TRUE)) | |
combinations <- unlist(lapply(1:nrow(combs),function(i){ | |
if(sum(combs[i,])==100){ | |
i | |
} | |
})) | |
selectedCombs <- combs[combinations,] | |
combsSorted <- lapply(1:nrow(selectedCombs),function(i){ | |
sort(selectedCombs[i,]) | |
}) | |
combs <- do.call('rbind',combsSorted) | |
uniqueCombs <- unique.array(combs) | |
sampleProp <- sample(1:nrow(uniqueCombs),1,replace = FALSE) | |
ranProps <- sample(uniqueCombs[sampleProp,]) | |
ranSd<- unlist(lapply(2:modality,function(i){ | |
eval(parse(text = paste(paste0("ranSd",i), | |
"<- ", | |
LaplacesDemon::rinvgaussian(n = 1, mu = sdParms$mu, lambda = sdParms$lambda)))) | |
})) | |
if(modality==2){ | |
ranFCs <- unlist(lapply(1:1,function(i){ | |
eval(parse(text = paste(paste0("ranFC",i), | |
"<-",(stats::runif(1, min = foldChanges[1], max = foldChanges[2]))))) | |
})) | |
}else{ | |
ranFCs <- unlist(lapply(1:(length(foldChanges)),function(i){ | |
if(modality ==2){ | |
eval(parse(text = paste(paste0("ranFC",i), | |
"<-",(stats::runif(1, min = foldChanges[1], max = foldChanges[2]))))) | |
}else{ | |
eval(parse(text = paste(paste0("ranFC",i), | |
"<-",(stats::runif(1, min = foldChanges[[i]][1], max = foldChanges[[i]][2]))))) | |
} | |
})) | |
} | |
eval(parse(text = paste(paste0("ranMean",2:modality), | |
"<-",NA))) | |
for(i in 1:modality){ | |
if(i!=1){ | |
eval(parse(text = paste(paste0("ranMean",(i)),"<-",paste0("ranMean",(i-1)),"*",paste0("ranFCs[",(i-1),"]")))) | |
} | |
} | |
} | |
sds <- c(ranSd1,ranSd) | |
means <- unlist(lapply(1:modality,function(i){ | |
eval(parse(text = paste(paste0("ranMean",(i))))) | |
})) | |
sizes <- (ranProps/100)*nPatients | |
patients <- 1 | |
groups <- lapply(1:length(sizes),function(i){ | |
if(i==1){ | |
sprintf("Patient%04d",1:sizes[i]) | |
}else{ | |
sprintf("Patient%04d",((sum(sizes[1:(i-1)])+1):(sizes[i]+sum(sizes[1:(i-1)])))) | |
} | |
}) | |
scenario <-c() | |
if(modality==1){ | |
scenario = "unimodal-gaussian" | |
}else if(modality>1&&is.null(gammaParms)){ | |
scenario = "multimodal-gaussian" | |
}else{ | |
scenario <- "multimodal-gamma+gaussian" | |
} | |
return(list("modus"=modality,"scenario"=scenario,"means"=means,"foldChanges"=ranFCs,"sds"=sds,"proportions"=ranProps,"sizes"=sizes,"groups"=groups)) | |
} | |
valData <- lapply(1:length(scenarios), function(i) { | |
if(scenarios[i]==1){ | |
gP <- drawGeneParams(nPatients = params@nPatients, | |
modality = 1, | |
proportions = params@proportions, | |
means = params@means$`1`, | |
sdParms = params@sd) | |
} | |
if((scenarios[i]>1)&&(scenarios[i]%%2==0)){ | |
gP <- drawGeneParams(nPatients = params@nPatients, | |
modality = (floor(scenarios[i]/2))+1, | |
proportions = params@proportions, | |
means = eval(parse(text = paste(paste0("params@means$`",((floor(scenarios[i]/2))+1),"`")))), | |
sdParms = params@sd, | |
foldChanges = eval(parse(text = paste(paste0("params@foldChanges$`",((floor(scenarios[i]/2))+1),"`$gauss"))))) | |
} | |
if((scenarios[i]>1)&&(scenarios[i]%%2!=0)){ | |
gP <- drawGeneParams(nPatients = params@nPatients, | |
modality = (floor(scenarios[i]/2))+1, | |
proportions = params@proportions, | |
sdParms = params@sd, | |
foldChanges = eval(parse(text = paste(paste0("params@foldChanges$`",((floor(scenarios[i]/2))+1),"`$gamma")))), | |
gammaParms = params@gamma) | |
} | |
return(gP) | |
}) | |
names(valData)<- genes | |
if(verbose){ message("Finished creating validation data.") } | |
return(valData) | |
} |