diff --git a/R/Dcenter.R b/R/Dcenter.R index 1d03a62..9b57af7 100644 --- a/R/Dcenter.R +++ b/R/Dcenter.R @@ -24,5 +24,6 @@ Dcenter <- function(x) { # normalise a ahat <- a - matrix(rowMeans(a), nrow = n, ncol = n) - matrix(colMeans(a), nrow = n, ncol = n, byrow = TRUE) + mean(a) + #return(ahat[!upper.tri(ahat)]) return(ahat) } diff --git a/R/dpm.R b/R/dpm.R index 56acd57..14e4eaf 100644 --- a/R/dpm.R +++ b/R/dpm.R @@ -2,16 +2,26 @@ #' #' @param data is a matrix containing samples in rows, variables in columns #' -#' @return a matrix with score for every edge +#' @return a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables #' #' @import corpcor #' #' @examples -#' ## Generate non-linear data -#' data <- nldata(100,0.2)$data +#' Example1: +#' ## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data +#' data <- nldata(nsamp=100,sigma=0.2)$data #' -#'## run DPM -#' dpm(data) +#'## run regularized DPM +#' res <-reg.dpm(data) +#' head(res) +#' +#' Example2: +#' ##load DREAM4 data (insilico_size10_1_knockouts) +#' data(dream4) +#' +#'## Run regularized DPM +#' res <- reg.dpm(dream4) +#' head(res) #' #' @export @@ -20,14 +30,10 @@ ############################### dpm <- function(data) { - # data is a matrix containing samples in rows, variables in columns - # compute the double-centred vectors of each variable and concatenate them into the columns of matrix d d <- apply(as.matrix(data), 2, Dcenter) - # compute the precision matrix of d (cor2pcor is a function from corpcor Library) res <- corpcor::cor2pcor(cov(d)) - return(res) } diff --git a/R/dream4_data.R b/R/dream4_data.R new file mode 100644 index 0000000..3f6dc66 --- /dev/null +++ b/R/dream4_data.R @@ -0,0 +1,16 @@ +#' One of the expression data (insilico_size10_1_knockouts) from DREAM4 in-silico challenge +#' +#' +#' @docType data +#' +#' @usage data(dream4) +#' +#' @format gene expression data as a matrix where columns correspond to 10 genes and rows correspond to samples (10 knockout). +#' +#' @keywords datasets +#' +#' @examples +#' data(dream4) +#' reg.dpm(dream4) +#' +"dream4" diff --git a/R/dream4_gs.R b/R/dream4_gs.R new file mode 100644 index 0000000..66d3f05 --- /dev/null +++ b/R/dream4_gs.R @@ -0,0 +1,14 @@ +#' The gold standard of insilico_size10 data from DREAM4 in-silico challenge +#' +#' @docType data +#' +#' @usage data(dream4gs) +#' +#' @format the adjacency matrix where columns and rows correspond 10 genes and the entry 1 in the matrix indicates a link between two corresponding genes. +#' +#' @keywords datasets +#' +#' @examples +#' data(dream4gs) +#' +"dream4gs" diff --git a/R/fisher_test.R b/R/fisher_test.R index f27faf6..d5c72fc 100644 --- a/R/fisher_test.R +++ b/R/fisher_test.R @@ -1,21 +1,28 @@ -#' Function to compute fisher z transformation pvalues +#' Function to compute fisher z-transformation pvalues for the links #' -#' @param p a -#' @param alpha a -#' @param nsamp is the number of required samples -#' @param nnodes is the number of nodes in the simulated graph +#' @param score_matrix score matrix obtained from DPM or reg.DPM. +#' @param alpha a significance level +#' @param nsamp is the number of samples (number of rows of input matrix for (reg-)DPM ). Note that we need nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5) +#' @param nvar is the number of variables (number of columns of input matrix for (reg-)DPM ) #' -#' @return a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links +#' @return a list containing +#' - the list of significant links as a data frame. Each line of the data frame corresponds to a link. +#' The first and second columns are the corresponding nodes of the link, the third column is the score of the link and the forth column is the corresponding pvalue. +#' - the threshold that all the links with scores more than it are significant at defined significance level. #' #' @examples -#' ## Generate non-linear data -#' data <- nldata(100,0.2)$data +#' ## Load DREAM4 data (insilico_size10_1_knockouts) +#' data(dream4) #' #'## Run regularized DPM #' res <- reg.dpm(data) #' -#' ## Compute pvalues -#' fisher_test(res, 0.05,) +#' ## Get the list of significant links at significance level alpha=0.05 +#' fisher_test(res,alpha=0.05,10,10)$LinkList +#' +#' ##Get the threshold at significance level alpha=0.05 +#' fisher_test(score_matrix=res,alpha=0.05,nsamp=10,nvar=10)$threshold +#' #' #' @export @@ -23,15 +30,22 @@ ### Function to compute fisher z transformation pvalues ### ########################################################### -fisher_test <- function(p, alpha, nsamp, nnodes){ - z = sqrt(nsamp-(nnodes-2)-3) * 0.5 * abs(log((1 + p) / (1 - p))) - pval = 2 * pnorm(-z) - pval_tr = qnorm(1-alpha/2) - isnotnull = (z > pval_tr ) - temp = exp( 2 * pval_tr / (sqrt(nsamp-(nnodes-2)-3)) ) - threshold = (temp -1)/(temp + 1) - return(list(pval=pval, isnotnull=isnotnull,threshold=threshold)) +fisher_test <- function(score_matrix, alpha, nsamp, nvar){ + nsamp <- (nsamp*(nsamp+1))/2 + if (nsamp-(nvar-2)-3 <= 0 ) stop("the test requires nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)") + diag(score_matrix) <- NA + score_matrix[upper.tri(score_matrix)] <- NA + linkList <- reshape2::melt(score_matrix, na.rm=TRUE) + colnames(linkList) <- c("Node1", "Node2", "Score") + p <- linkList["Score"] + z <- sqrt(nsamp-(nvar-2)-3) * abs(atan(p)) + pval <- 2 * pnorm(-as.matrix(z)) + colnames(pval) <- "Pvalue" + linkList <- cbind(linkList,pval) + linkList <- linkList[linkList$Pvalue 2*(nvar-5)} -\item{nnodes}{is the number of nodes in the simulated graph} +\item{nvar}{is the number of variables (number of columns of input matrix for (reg-)DPM )} } \value{ -a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links +a list containing +- the list of significant links as a data frame. Each line of the data frame corresponds to a link. +The first and second columns are the corresponding nodes of the link, the third column is the score of the link and the forth column is the corresponding pvalue. +- the threshold that all the links with scores more than it are significant at defined significance level. } \description{ -Function to compute fisher z transformation pvalues +Function to compute fisher z-transformation pvalues for the links } \examples{ -## Generate non-linear data -data <- nldata(100,0.2)$data +## Load DREAM4 data (insilico_size10_1_knockouts) +data(dream4) ## Run regularized DPM res <- reg.dpm(data) -## Compute pvalues -fisher_test(res, 0.05,) +## Get the list of significant links at significance level alpha=0.05 +fisher_test(res,alpha=0.05,10,10)$LinkList + +##Get the threshold at significance level alpha=0.05 +fisher_test(score_matrix=res,alpha=0.05,nsamp=10,nvar=10)$threshold + } diff --git a/man/get_link.Rd b/man/get_link.Rd index 2e85f34..aa975df 100644 --- a/man/get_link.Rd +++ b/man/get_link.Rd @@ -20,21 +20,22 @@ List of links in a data frame. Each line of the data frame corresponds to a link Converts the score matrix (obtained from DPM or reg-DPM) to a sorted list of non-directed links (links with higher score first). } \examples{ -## Generate linear data with 10 nodes and 100 samples -data <- rdata(nnodes=10,nsamp=100)$data -rownames(data) <- paste("Sample", 1:100, sep="") -colnames(data) <- paste("Node", 1:10, sep="") +##load DREAM4 data (insilico_size10_1_knockouts) +data(dream4) -## Run reg_DPM -score_mat <- reg.dpm(data) +## Run regularized DPM +score_mat <- reg.dpm(dream4) -## Get ranking of link +## Get ranking of links links <- get_link(score_mat) head(linkList) -## Get only top ranked links +## Get only the top ranked links links <- get_link(score_mat,top_ranked=10) +head(linkList) ## Get only the links with a score higher than a threshold links <- get_link(score_mat,threshold = 0.2) +head(linkList) + } diff --git a/man/reg.dpm.Rd b/man/reg.dpm.Rd index ff5040a..d877cd6 100644 --- a/man/reg.dpm.Rd +++ b/man/reg.dpm.Rd @@ -7,19 +7,28 @@ reg.dpm(data) } \arguments{ -\item{data}{is a matrix containing samples in rows, variables in columns} +\item{data}{is a matrix containing samples in rows and variables in columns} } \value{ -a matrix with score for every edge +a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables } \description{ Function to compute regularized DPM } \examples{ -## Generate non-linear data with 100 samples and the standard deviation of 0.2 of the noise added to the generated data +example1: +## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data data <- nldata(nsamp=100,sigma=0.2)$data ## run regularized DPM reg.dpm(data) +example2: +##load DREAM4 data +data(dream4) + +## Run regularized DPM +res <- reg.dpm(data) + + }