update R package

ghanbari · Sep 6, 2017 · 339bb56 · 339bb56
1 parent a5a0db4
commit 339bb56
Show file tree

Hide file tree

Showing 16 changed files with 281 additions and 111 deletions.
diff --git a/R/Dcenter.R b/R/Dcenter.R
@@ -24,5 +24,6 @@ Dcenter <- function(x) {
   # normalise a
   ahat <- a - matrix(rowMeans(a), nrow = n, ncol = n) - matrix(colMeans(a), nrow = n, ncol = n, byrow = TRUE) + mean(a)
 
+  #return(ahat[!upper.tri(ahat)])
   return(ahat)
 }
diff --git a/R/dpm.R b/R/dpm.R
@@ -2,16 +2,26 @@
 #'
 #' @param data is a matrix containing samples in rows, variables in columns
 #'
-#' @return a matrix with score for every edge
+#' @return a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables
 #'
 #' @import corpcor
 #'
 #' @examples
-#' ## Generate non-linear data
-#' data <- nldata(100,0.2)$data
+#' Example1:
+#' ## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data
+#' data <- nldata(nsamp=100,sigma=0.2)$data
 #'
-#'## run DPM
-#' dpm(data)
+#'## run regularized DPM
+#' res <-reg.dpm(data)
+#' head(res)
+#'
+#' Example2:
+#' ##load DREAM4 data (insilico_size10_1_knockouts)
+#' data(dream4)
+#'
+#'## Run regularized DPM
+#' res <- reg.dpm(dream4)
+#' head(res)
 #'
 #' @export
 
@@ -20,14 +30,10 @@
 ###############################
 
 dpm <- function(data) {
-  # data is a matrix containing samples in rows, variables in columns
-
   # compute the double-centred vectors of each variable and concatenate them into the columns of matrix d
   d <- apply(as.matrix(data), 2, Dcenter)
-
   # compute the precision matrix of d (cor2pcor is a function from corpcor Library)
   res <- corpcor::cor2pcor(cov(d))
-
   return(res)
 }
 

diff --git a/R/dream4_data.R b/R/dream4_data.R
@@ -0,0 +1,16 @@
+#' One of the expression data (insilico_size10_1_knockouts) from DREAM4 in-silico challenge
+#'
+#'
+#' @docType data
+#'
+#' @usage data(dream4)
+#'
+#' @format gene expression data as a matrix where columns correspond to 10 genes and rows correspond to samples (10 knockout).
+#'
+#' @keywords datasets
+#'
+#' @examples
+#' data(dream4)
+#' reg.dpm(dream4)
+#'
+"dream4"
diff --git a/R/dream4_gs.R b/R/dream4_gs.R
@@ -0,0 +1,14 @@
+#' The gold standard of insilico_size10 data from DREAM4 in-silico challenge
+#'
+#' @docType data
+#'
+#' @usage data(dream4gs)
+#'
+#' @format the adjacency matrix where columns and rows correspond 10 genes and the entry 1 in the matrix indicates a link between two corresponding genes.
+#'
+#' @keywords datasets
+#'
+#' @examples
+#' data(dream4gs)
+#'
+"dream4gs"
diff --git a/R/fisher_test.R b/R/fisher_test.R
@@ -1,37 +1,51 @@
-#' Function to compute fisher z transformation pvalues
+#' Function to compute fisher z-transformation pvalues for the links
 #'
-#' @param p a
-#' @param alpha a
-#' @param nsamp is the number of required samples
-#' @param nnodes is the number of nodes in the simulated graph
+#' @param score_matrix score matrix obtained from DPM or reg.DPM.
+#' @param alpha a significance level
+#' @param nsamp is the number of samples (number of rows of input matrix for (reg-)DPM ). Note that we need nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)
+#' @param nvar is the number of variables (number of columns of input matrix for (reg-)DPM )
 #'
-#' @return a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links
+#' @return a list containing
+#' - the list of significant links as a data frame. Each line of the data frame corresponds to a link.
+#' The first and second columns are the corresponding nodes of the link, the third column is the score of the link and the forth column is the corresponding pvalue.
+#' - the threshold that all the links with scores more than it are significant at defined significance level.
 #'
 #' @examples
-#' ## Generate non-linear data
-#' data <- nldata(100,0.2)$data
+#' ## Load DREAM4 data (insilico_size10_1_knockouts)
+#' data(dream4)
 #'
 #'## Run regularized DPM
 #' res <- reg.dpm(data)
 #'
-#' ## Compute pvalues
-#' fisher_test(res, 0.05,)
+#' ## Get the list of significant links at significance level alpha=0.05
+#' fisher_test(res,alpha=0.05,10,10)$LinkList
+#'
+#' ##Get the threshold at significance level alpha=0.05
+#' fisher_test(score_matrix=res,alpha=0.05,nsamp=10,nvar=10)$threshold
+#'
 #'
 #' @export
 
 ###########################################################
 ### Function to compute fisher z transformation pvalues ###
 ###########################################################
 
-fisher_test <- function(p, alpha, nsamp, nnodes){
-  z = sqrt(nsamp-(nnodes-2)-3) * 0.5 * abs(log((1 + p) / (1 - p)))
-  pval = 2 * pnorm(-z)
-  pval_tr = qnorm(1-alpha/2)
-  isnotnull = (z > pval_tr )
-  temp = exp( 2 * pval_tr / (sqrt(nsamp-(nnodes-2)-3)) )
-  threshold = (temp -1)/(temp + 1)
-  return(list(pval=pval, isnotnull=isnotnull,threshold=threshold))
+fisher_test <- function(score_matrix, alpha, nsamp, nvar){
+  nsamp <- (nsamp*(nsamp+1))/2
+  if (nsamp-(nvar-2)-3 <= 0 ) stop("the test requires nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)")
+  diag(score_matrix) <- NA
+  score_matrix[upper.tri(score_matrix)] <- NA
+  linkList <- reshape2::melt(score_matrix, na.rm=TRUE)
+  colnames(linkList) <- c("Node1", "Node2", "Score")
+  p <- linkList["Score"]
+  z <- sqrt(nsamp-(nvar-2)-3) * abs(atan(p))
+  pval <- 2 * pnorm(-as.matrix(z))
+  colnames(pval) <- "Pvalue"
+  linkList <- cbind(linkList,pval)
+  linkList <- linkList[linkList$Pvalue<alpha,]
+  temp <- exp( 2 * pval_tr / (sqrt(nsamp-(nvar-2)-3)) )
+  threshold <- (temp -1)/(temp + 1)
+  return(list(LinkList=linkList,threshold=threshold))
 }
 
 
-
diff --git a/R/get_link.R b/R/get_link.R
@@ -7,23 +7,24 @@
 #' @return List of links in a data frame. Each line of the data frame corresponds to a link. The first and second columns are the corresponding nodes of the link and the third column is the score of the link.
 #'
 #' @examples
-#'## Generate linear data with 10 nodes and 100 samples
-#' data <- rdata(nnodes=10,nsamp=100)$data
-#' rownames(data) <- paste("Sample", 1:100, sep="")
-#' colnames(data) <- paste("Node", 1:10, sep="")
+#'##load DREAM4 data (insilico_size10_1_knockouts)
+#' data(dream4)
 #'
-#' ## Run reg_DPM
-#' score_mat <- reg.dpm(data)
+#'## Run regularized DPM
+#' score_mat <- reg.dpm(dream4)
 #'
-#' ## Get ranking of link
+#' ## Get ranking of links
 #' links <- get_link(score_mat)
 #' head(linkList)
 #'
-#' ## Get only top ranked links
+#' ## Get only the top ranked links
 #' links <- get_link(score_mat,top_ranked=10)
+#' head(linkList)
 #'
 #' ## Get only the links with a score higher than a threshold
 #' links <- get_link(score_mat,threshold = 0.2)
+#' head(linkList)
+#'
 #' @export
 #'
 #'

diff --git a/R/reg.dpm.R b/R/reg.dpm.R
@@ -1,18 +1,27 @@
 #' Function to compute regularized DPM
 #'
-#' @param data is a matrix containing samples in rows, variables in columns
+#' @param data is a matrix containing samples in rows and variables in columns
 #'
-#' @return a matrix with score for every edge
+#' @return a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables
 #'
 #' @import corpcor
 #'
 #' @examples
-#' ## Generate non-linear data with 100 samples and the standard deviation of 0.2 of the noise added to the generated data
+#' example1:
+#' ## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data
 #' data <- nldata(nsamp=100,sigma=0.2)$data
 #'
 #'## run regularized DPM
 #' reg.dpm(data)
 #'
+#' example2:
+#' ##load DREAM4 data
+#' data(dream4)
+#'
+#'## Run regularized DPM
+#' res <- reg.dpm(data)
+#'
+#'
 #' @export
 
 
@@ -21,13 +30,10 @@
 ###########################################
 
 reg.dpm <- function(data) {
-  # data is a matrix containing samples in rows, variables in columns
-
   # compute the double-centred vectors of each variable and concatenate them into the columns of matrix d
   d <- apply(as.matrix(data), 2, Dcenter)
-
   # compute the regularised precision matrix of d
   res <- corpcor::pcor.shrink(d)
-
+  res <- matrix(res,nrow(res),ncol(res))
   return(res)
 }