Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
update R package
  • Loading branch information
Ghanbari committed Sep 6, 2017
1 parent a5a0db4 commit 339bb56
Show file tree
Hide file tree
Showing 16 changed files with 281 additions and 111 deletions.
1 change: 1 addition & 0 deletions R/Dcenter.R
Expand Up @@ -24,5 +24,6 @@ Dcenter <- function(x) {
# normalise a
ahat <- a - matrix(rowMeans(a), nrow = n, ncol = n) - matrix(colMeans(a), nrow = n, ncol = n, byrow = TRUE) + mean(a)

#return(ahat[!upper.tri(ahat)])
return(ahat)
}
24 changes: 15 additions & 9 deletions R/dpm.R
Expand Up @@ -2,16 +2,26 @@
#'
#' @param data is a matrix containing samples in rows, variables in columns
#'
#' @return a matrix with score for every edge
#' @return a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables
#'
#' @import corpcor
#'
#' @examples
#' ## Generate non-linear data
#' data <- nldata(100,0.2)$data
#' Example1:
#' ## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data
#' data <- nldata(nsamp=100,sigma=0.2)$data
#'
#'## run DPM
#' dpm(data)
#'## run regularized DPM
#' res <-reg.dpm(data)
#' head(res)
#'
#' Example2:
#' ##load DREAM4 data (insilico_size10_1_knockouts)
#' data(dream4)
#'
#'## Run regularized DPM
#' res <- reg.dpm(dream4)
#' head(res)
#'
#' @export

Expand All @@ -20,14 +30,10 @@
###############################

dpm <- function(data) {
# data is a matrix containing samples in rows, variables in columns

# compute the double-centred vectors of each variable and concatenate them into the columns of matrix d
d <- apply(as.matrix(data), 2, Dcenter)

# compute the precision matrix of d (cor2pcor is a function from corpcor Library)
res <- corpcor::cor2pcor(cov(d))

return(res)
}

Expand Down
16 changes: 16 additions & 0 deletions R/dream4_data.R
@@ -0,0 +1,16 @@
#' One of the expression data (insilico_size10_1_knockouts) from DREAM4 in-silico challenge
#'
#'
#' @docType data
#'
#' @usage data(dream4)
#'
#' @format gene expression data as a matrix where columns correspond to 10 genes and rows correspond to samples (10 knockout).
#'
#' @keywords datasets
#'
#' @examples
#' data(dream4)
#' reg.dpm(dream4)
#'
"dream4"
14 changes: 14 additions & 0 deletions R/dream4_gs.R
@@ -0,0 +1,14 @@
#' The gold standard of insilico_size10 data from DREAM4 in-silico challenge
#'
#' @docType data
#'
#' @usage data(dream4gs)
#'
#' @format the adjacency matrix where columns and rows correspond 10 genes and the entry 1 in the matrix indicates a link between two corresponding genes.
#'
#' @keywords datasets
#'
#' @examples
#' data(dream4gs)
#'
"dream4gs"
52 changes: 33 additions & 19 deletions R/fisher_test.R
@@ -1,37 +1,51 @@
#' Function to compute fisher z transformation pvalues
#' Function to compute fisher z-transformation pvalues for the links
#'
#' @param p a
#' @param alpha a
#' @param nsamp is the number of required samples
#' @param nnodes is the number of nodes in the simulated graph
#' @param score_matrix score matrix obtained from DPM or reg.DPM.
#' @param alpha a significance level
#' @param nsamp is the number of samples (number of rows of input matrix for (reg-)DPM ). Note that we need nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)
#' @param nvar is the number of variables (number of columns of input matrix for (reg-)DPM )
#'
#' @return a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links
#' @return a list containing
#' - the list of significant links as a data frame. Each line of the data frame corresponds to a link.
#' The first and second columns are the corresponding nodes of the link, the third column is the score of the link and the forth column is the corresponding pvalue.
#' - the threshold that all the links with scores more than it are significant at defined significance level.
#'
#' @examples
#' ## Generate non-linear data
#' data <- nldata(100,0.2)$data
#' ## Load DREAM4 data (insilico_size10_1_knockouts)
#' data(dream4)
#'
#'## Run regularized DPM
#' res <- reg.dpm(data)
#'
#' ## Compute pvalues
#' fisher_test(res, 0.05,)
#' ## Get the list of significant links at significance level alpha=0.05
#' fisher_test(res,alpha=0.05,10,10)$LinkList
#'
#' ##Get the threshold at significance level alpha=0.05
#' fisher_test(score_matrix=res,alpha=0.05,nsamp=10,nvar=10)$threshold
#'
#'
#' @export

###########################################################
### Function to compute fisher z transformation pvalues ###
###########################################################

fisher_test <- function(p, alpha, nsamp, nnodes){
z = sqrt(nsamp-(nnodes-2)-3) * 0.5 * abs(log((1 + p) / (1 - p)))
pval = 2 * pnorm(-z)
pval_tr = qnorm(1-alpha/2)
isnotnull = (z > pval_tr )
temp = exp( 2 * pval_tr / (sqrt(nsamp-(nnodes-2)-3)) )
threshold = (temp -1)/(temp + 1)
return(list(pval=pval, isnotnull=isnotnull,threshold=threshold))
fisher_test <- function(score_matrix, alpha, nsamp, nvar){
nsamp <- (nsamp*(nsamp+1))/2
if (nsamp-(nvar-2)-3 <= 0 ) stop("the test requires nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)")
diag(score_matrix) <- NA
score_matrix[upper.tri(score_matrix)] <- NA
linkList <- reshape2::melt(score_matrix, na.rm=TRUE)
colnames(linkList) <- c("Node1", "Node2", "Score")
p <- linkList["Score"]
z <- sqrt(nsamp-(nvar-2)-3) * abs(atan(p))
pval <- 2 * pnorm(-as.matrix(z))
colnames(pval) <- "Pvalue"
linkList <- cbind(linkList,pval)
linkList <- linkList[linkList$Pvalue<alpha,]
temp <- exp( 2 * pval_tr / (sqrt(nsamp-(nvar-2)-3)) )
threshold <- (temp -1)/(temp + 1)
return(list(LinkList=linkList,threshold=threshold))
}



17 changes: 9 additions & 8 deletions R/get_link.R
Expand Up @@ -7,23 +7,24 @@
#' @return List of links in a data frame. Each line of the data frame corresponds to a link. The first and second columns are the corresponding nodes of the link and the third column is the score of the link.
#'
#' @examples
#'## Generate linear data with 10 nodes and 100 samples
#' data <- rdata(nnodes=10,nsamp=100)$data
#' rownames(data) <- paste("Sample", 1:100, sep="")
#' colnames(data) <- paste("Node", 1:10, sep="")
#'##load DREAM4 data (insilico_size10_1_knockouts)
#' data(dream4)
#'
#' ## Run reg_DPM
#' score_mat <- reg.dpm(data)
#'## Run regularized DPM
#' score_mat <- reg.dpm(dream4)
#'
#' ## Get ranking of link
#' ## Get ranking of links
#' links <- get_link(score_mat)
#' head(linkList)
#'
#' ## Get only top ranked links
#' ## Get only the top ranked links
#' links <- get_link(score_mat,top_ranked=10)
#' head(linkList)
#'
#' ## Get only the links with a score higher than a threshold
#' links <- get_link(score_mat,threshold = 0.2)
#' head(linkList)
#'
#' @export
#'
#'
Expand Down
20 changes: 13 additions & 7 deletions R/reg.dpm.R
@@ -1,18 +1,27 @@
#' Function to compute regularized DPM
#'
#' @param data is a matrix containing samples in rows, variables in columns
#' @param data is a matrix containing samples in rows and variables in columns
#'
#' @return a matrix with score for every edge
#' @return a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables
#'
#' @import corpcor
#'
#' @examples
#' ## Generate non-linear data with 100 samples and the standard deviation of 0.2 of the noise added to the generated data
#' example1:
#' ## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data
#' data <- nldata(nsamp=100,sigma=0.2)$data
#'
#'## run regularized DPM
#' reg.dpm(data)
#'
#' example2:
#' ##load DREAM4 data
#' data(dream4)
#'
#'## Run regularized DPM
#' res <- reg.dpm(data)
#'
#'
#' @export


Expand All @@ -21,13 +30,10 @@
###########################################

reg.dpm <- function(data) {
# data is a matrix containing samples in rows, variables in columns

# compute the double-centred vectors of each variable and concatenate them into the columns of matrix d
d <- apply(as.matrix(data), 2, Dcenter)

# compute the regularised precision matrix of d
res <- corpcor::pcor.shrink(d)

res <- matrix(res,nrow(res),ncol(res))
return(res)
}

0 comments on commit 339bb56

Please sign in to comment.