From 339bb5649da1add91b6a76e358f82daa5a426e18 Mon Sep 17 00:00:00 2001 From: Ghanbari Date: Wed, 6 Sep 2017 20:33:34 +0200 Subject: [PATCH] update R package --- R/Dcenter.R | 1 + R/dpm.R | 24 +++++---- R/dream4_data.R | 16 ++++++ R/dream4_gs.R | 14 +++++ R/fisher_test.R | 52 +++++++++++------- R/get_link.R | 17 +++--- R/reg.dpm.R | 20 ++++--- README.md | 128 +++++++++++++++++++++++++++++++-------------- data/dream4.rda | Bin 0 -> 1038 bytes data/dream4gs.rda | Bin 0 -> 138 bytes man/dpm.Rd | 20 +++++-- man/dream4.Rd | 19 +++++++ man/dream4gs.Rd | 18 +++++++ man/fisher_test.Rd | 31 ++++++----- man/get_link.Rd | 17 +++--- man/reg.dpm.Rd | 15 ++++-- 16 files changed, 281 insertions(+), 111 deletions(-) create mode 100644 R/dream4_data.R create mode 100644 R/dream4_gs.R create mode 100644 data/dream4.rda create mode 100644 data/dream4gs.rda create mode 100644 man/dream4.Rd create mode 100644 man/dream4gs.Rd diff --git a/R/Dcenter.R b/R/Dcenter.R index 1d03a62..9b57af7 100644 --- a/R/Dcenter.R +++ b/R/Dcenter.R @@ -24,5 +24,6 @@ Dcenter <- function(x) { # normalise a ahat <- a - matrix(rowMeans(a), nrow = n, ncol = n) - matrix(colMeans(a), nrow = n, ncol = n, byrow = TRUE) + mean(a) + #return(ahat[!upper.tri(ahat)]) return(ahat) } diff --git a/R/dpm.R b/R/dpm.R index 56acd57..14e4eaf 100644 --- a/R/dpm.R +++ b/R/dpm.R @@ -2,16 +2,26 @@ #' #' @param data is a matrix containing samples in rows, variables in columns #' -#' @return a matrix with score for every edge +#' @return a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables #' #' @import corpcor #' #' @examples -#' ## Generate non-linear data -#' data <- nldata(100,0.2)$data +#' Example1: +#' ## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data +#' data <- nldata(nsamp=100,sigma=0.2)$data #' -#'## run DPM -#' dpm(data) +#'## run regularized DPM +#' res <-reg.dpm(data) +#' head(res) +#' +#' Example2: +#' ##load DREAM4 data (insilico_size10_1_knockouts) +#' data(dream4) +#' +#'## Run regularized DPM +#' res <- reg.dpm(dream4) +#' head(res) #' #' @export @@ -20,14 +30,10 @@ ############################### dpm <- function(data) { - # data is a matrix containing samples in rows, variables in columns - # compute the double-centred vectors of each variable and concatenate them into the columns of matrix d d <- apply(as.matrix(data), 2, Dcenter) - # compute the precision matrix of d (cor2pcor is a function from corpcor Library) res <- corpcor::cor2pcor(cov(d)) - return(res) } diff --git a/R/dream4_data.R b/R/dream4_data.R new file mode 100644 index 0000000..3f6dc66 --- /dev/null +++ b/R/dream4_data.R @@ -0,0 +1,16 @@ +#' One of the expression data (insilico_size10_1_knockouts) from DREAM4 in-silico challenge +#' +#' +#' @docType data +#' +#' @usage data(dream4) +#' +#' @format gene expression data as a matrix where columns correspond to 10 genes and rows correspond to samples (10 knockout). +#' +#' @keywords datasets +#' +#' @examples +#' data(dream4) +#' reg.dpm(dream4) +#' +"dream4" diff --git a/R/dream4_gs.R b/R/dream4_gs.R new file mode 100644 index 0000000..66d3f05 --- /dev/null +++ b/R/dream4_gs.R @@ -0,0 +1,14 @@ +#' The gold standard of insilico_size10 data from DREAM4 in-silico challenge +#' +#' @docType data +#' +#' @usage data(dream4gs) +#' +#' @format the adjacency matrix where columns and rows correspond 10 genes and the entry 1 in the matrix indicates a link between two corresponding genes. +#' +#' @keywords datasets +#' +#' @examples +#' data(dream4gs) +#' +"dream4gs" diff --git a/R/fisher_test.R b/R/fisher_test.R index f27faf6..d5c72fc 100644 --- a/R/fisher_test.R +++ b/R/fisher_test.R @@ -1,21 +1,28 @@ -#' Function to compute fisher z transformation pvalues +#' Function to compute fisher z-transformation pvalues for the links #' -#' @param p a -#' @param alpha a -#' @param nsamp is the number of required samples -#' @param nnodes is the number of nodes in the simulated graph +#' @param score_matrix score matrix obtained from DPM or reg.DPM. +#' @param alpha a significance level +#' @param nsamp is the number of samples (number of rows of input matrix for (reg-)DPM ). Note that we need nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5) +#' @param nvar is the number of variables (number of columns of input matrix for (reg-)DPM ) #' -#' @return a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links +#' @return a list containing +#' - the list of significant links as a data frame. Each line of the data frame corresponds to a link. +#' The first and second columns are the corresponding nodes of the link, the third column is the score of the link and the forth column is the corresponding pvalue. +#' - the threshold that all the links with scores more than it are significant at defined significance level. #' #' @examples -#' ## Generate non-linear data -#' data <- nldata(100,0.2)$data +#' ## Load DREAM4 data (insilico_size10_1_knockouts) +#' data(dream4) #' #'## Run regularized DPM #' res <- reg.dpm(data) #' -#' ## Compute pvalues -#' fisher_test(res, 0.05,) +#' ## Get the list of significant links at significance level alpha=0.05 +#' fisher_test(res,alpha=0.05,10,10)$LinkList +#' +#' ##Get the threshold at significance level alpha=0.05 +#' fisher_test(score_matrix=res,alpha=0.05,nsamp=10,nvar=10)$threshold +#' #' #' @export @@ -23,15 +30,22 @@ ### Function to compute fisher z transformation pvalues ### ########################################################### -fisher_test <- function(p, alpha, nsamp, nnodes){ - z = sqrt(nsamp-(nnodes-2)-3) * 0.5 * abs(log((1 + p) / (1 - p))) - pval = 2 * pnorm(-z) - pval_tr = qnorm(1-alpha/2) - isnotnull = (z > pval_tr ) - temp = exp( 2 * pval_tr / (sqrt(nsamp-(nnodes-2)-3)) ) - threshold = (temp -1)/(temp + 1) - return(list(pval=pval, isnotnull=isnotnull,threshold=threshold)) +fisher_test <- function(score_matrix, alpha, nsamp, nvar){ + nsamp <- (nsamp*(nsamp+1))/2 + if (nsamp-(nvar-2)-3 <= 0 ) stop("the test requires nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)") + diag(score_matrix) <- NA + score_matrix[upper.tri(score_matrix)] <- NA + linkList <- reshape2::melt(score_matrix, na.rm=TRUE) + colnames(linkList) <- c("Node1", "Node2", "Score") + p <- linkList["Score"] + z <- sqrt(nsamp-(nvar-2)-3) * abs(atan(p)) + pval <- 2 * pnorm(-as.matrix(z)) + colnames(pval) <- "Pvalue" + linkList <- cbind(linkList,pval) + linkList <- linkList[linkList$PvalueVH8O1iU}%`hFaQ7m0BT{RFqljLm_`YJ005ZK0G_5J z01ZYNOwp!|sWjRR8USiwbi`=T01W^DCK_k}001>G35^DT00E!?0B8ULU=0S7CIA2? zm>K{80ibAr01XWQ!~hx%0000q000000002c000000009(&;V!v15FwL4H9USdQBP` zs0<(gVlo&pwQ51kZGZzp^yd;00E|&8UWBVWND$MgCNM$5Caf-5CCS8vh(EO zvQ(2%m5^3KwK=k6L@e$Af&$66M_6f`*1@L63fBOD35k&q3m8N5`~uK0P+ zc>*#oS%Rwi&ZPl#Lc3@hhn8Wk1R28w6jtTgop+9b-!#Nvu4ZS3)8fe&7z(zcsNFlh`0$st# zu#De?CDyYvB|J_~LKIj**)?RG3VoXd2E)-H3hb#=H2P&e+-Dms=PO~UPJ_<_7cNi?hST{I4ztQaJQhY;~$2$qWJ8;Z^a zd-)t^CRpqwd6=8WO!BJ&vEM25D%p3-qhd^Ntc^@Y01h5t695DUOUvUYv93cIdGtDG z2&V&;FP(bH0y91($X+ih3bLj+w_CgInQut-q7o?4-v%;=4w@_^+5?)%07nS`f}!Jh zDmDOC#E=vTw=eUfV{805lwv3J@pgDcHV`dzqP z=NK!~!C)oSu{2s$L83X+C&}D^U@McNt);(JpVsYEgJmR-oILu~^Q(uHk}iR=KBZNT zokbLheg#Yf2Mv#yI4~jYcgP=ly#pH6+bA1XAcJc5pM;M=xEz6|zn%0*d0xVX+fmhT zsbB>`Wvsr92aaqBWRu(>7u^(dYNT&)C I6*PIqphJ1a$N&HU literal 0 HcmV?d00001 diff --git a/data/dream4gs.rda b/data/dream4gs.rda new file mode 100644 index 0000000000000000000000000000000000000000..359d44968f1e1cc220abf27190a1168b2f7aca83 GIT binary patch literal 138 zcmV;50CoRDT4*^jL0KkKSqfAlmH+^7e}MdV5D*X$6cB^}5I`of7yv*4AOLb2K$>9; z5DgDdNQj}eG6q1=rcF?KX(}9TQCowpC>k*=9PI&q&?JrT0hAg-V-YN~Ik3%#cB;}Q s_^)Fv(VR@{;UYtx8;T1AyivXQx%79t59Jwv34`%>BvXY60Hs1HVCLvD2><{9 literal 0 HcmV?d00001 diff --git a/man/dpm.Rd b/man/dpm.Rd index 2741b56..f0ca119 100644 --- a/man/dpm.Rd +++ b/man/dpm.Rd @@ -10,16 +10,26 @@ dpm(data) \item{data}{is a matrix containing samples in rows, variables in columns} } \value{ -a matrix with score for every edge +a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables } \description{ Function to compute DPM } \examples{ -## Generate non-linear data -data <- nldata(100,0.2)$data +Example1: +## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data +data <- nldata(nsamp=100,sigma=0.2)$data -## run DPM -dpm(data) +## run regularized DPM +res <-reg.dpm(data) +head(res) + +Example2: +##load DREAM4 data (insilico_size10_1_knockouts) +data(dream4) + +## Run regularized DPM +res <- reg.dpm(dream4) +head(res) } diff --git a/man/dream4.Rd b/man/dream4.Rd new file mode 100644 index 0000000..6dc086e --- /dev/null +++ b/man/dream4.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dream4_data.R +\docType{data} +\name{dream4} +\alias{dream4} +\title{One of the expression data (insilico_size10_1_knockouts) from DREAM4 in-silico challenge} +\format{gene expression data as a matrix where columns correspond to 10 genes and rows correspond to samples (10 knockout).} +\usage{ +data(dream4) +} +\description{ +One of the expression data (insilico_size10_1_knockouts) from DREAM4 in-silico challenge +} +\examples{ +data(dream4) +reg.dpm(dream4) + +} +\keyword{datasets} diff --git a/man/dream4gs.Rd b/man/dream4gs.Rd new file mode 100644 index 0000000..52623bf --- /dev/null +++ b/man/dream4gs.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dream4_gs.R +\docType{data} +\name{dream4gs} +\alias{dream4gs} +\title{The gold standard of insilico_size10 data from DREAM4 in-silico challenge} +\format{the adjacency matrix where columns and rows correspond 10 genes and the entry 1 in the matrix indicates a link between two corresponding genes.} +\usage{ +data(dream4gs) +} +\description{ +The gold standard of insilico_size10 data from DREAM4 in-silico challenge +} +\examples{ +data(dream4gs) + +} +\keyword{datasets} diff --git a/man/fisher_test.Rd b/man/fisher_test.Rd index 4d29372..6d4a87f 100644 --- a/man/fisher_test.Rd +++ b/man/fisher_test.Rd @@ -2,33 +2,40 @@ % Please edit documentation in R/fisher_test.R \name{fisher_test} \alias{fisher_test} -\title{Function to compute fisher z transformation pvalues} +\title{Function to compute fisher z-transformation pvalues for the links} \usage{ -fisher_test(p, alpha, nsamp, nnodes) +fisher_test(score_matrix, alpha, nsamp, nvar) } \arguments{ -\item{p}{a} +\item{score_matrix}{score matrix obtained from DPM or reg.DPM.} -\item{alpha}{a} +\item{alpha}{a significance level} -\item{nsamp}{is the number of required samples} +\item{nsamp}{is the number of samples (number of rows of input matrix for (reg-)DPM ). Note that we need nsamp such that: nsamp*(nsamp+1)> 2*(nvar-5)} -\item{nnodes}{is the number of nodes in the simulated graph} +\item{nvar}{is the number of variables (number of columns of input matrix for (reg-)DPM )} } \value{ -a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links +a list containing +- the list of significant links as a data frame. Each line of the data frame corresponds to a link. +The first and second columns are the corresponding nodes of the link, the third column is the score of the link and the forth column is the corresponding pvalue. +- the threshold that all the links with scores more than it are significant at defined significance level. } \description{ -Function to compute fisher z transformation pvalues +Function to compute fisher z-transformation pvalues for the links } \examples{ -## Generate non-linear data -data <- nldata(100,0.2)$data +## Load DREAM4 data (insilico_size10_1_knockouts) +data(dream4) ## Run regularized DPM res <- reg.dpm(data) -## Compute pvalues -fisher_test(res, 0.05,) +## Get the list of significant links at significance level alpha=0.05 +fisher_test(res,alpha=0.05,10,10)$LinkList + +##Get the threshold at significance level alpha=0.05 +fisher_test(score_matrix=res,alpha=0.05,nsamp=10,nvar=10)$threshold + } diff --git a/man/get_link.Rd b/man/get_link.Rd index 2e85f34..aa975df 100644 --- a/man/get_link.Rd +++ b/man/get_link.Rd @@ -20,21 +20,22 @@ List of links in a data frame. Each line of the data frame corresponds to a link Converts the score matrix (obtained from DPM or reg-DPM) to a sorted list of non-directed links (links with higher score first). } \examples{ -## Generate linear data with 10 nodes and 100 samples -data <- rdata(nnodes=10,nsamp=100)$data -rownames(data) <- paste("Sample", 1:100, sep="") -colnames(data) <- paste("Node", 1:10, sep="") +##load DREAM4 data (insilico_size10_1_knockouts) +data(dream4) -## Run reg_DPM -score_mat <- reg.dpm(data) +## Run regularized DPM +score_mat <- reg.dpm(dream4) -## Get ranking of link +## Get ranking of links links <- get_link(score_mat) head(linkList) -## Get only top ranked links +## Get only the top ranked links links <- get_link(score_mat,top_ranked=10) +head(linkList) ## Get only the links with a score higher than a threshold links <- get_link(score_mat,threshold = 0.2) +head(linkList) + } diff --git a/man/reg.dpm.Rd b/man/reg.dpm.Rd index ff5040a..d877cd6 100644 --- a/man/reg.dpm.Rd +++ b/man/reg.dpm.Rd @@ -7,19 +7,28 @@ reg.dpm(data) } \arguments{ -\item{data}{is a matrix containing samples in rows, variables in columns} +\item{data}{is a matrix containing samples in rows and variables in columns} } \value{ -a matrix with score for every edge +a nvar*nvar matrix containing reg.DPM scores for all links (edges) where nvar is the number of variables } \description{ Function to compute regularized DPM } \examples{ -## Generate non-linear data with 100 samples and the standard deviation of 0.2 of the noise added to the generated data +example1: +## Generate non-linear data with 100 samples (from the network discussed in the paper) where the gaussian noise with standard deviation of sigma=0.2 is added to the generated data data <- nldata(nsamp=100,sigma=0.2)$data ## run regularized DPM reg.dpm(data) +example2: +##load DREAM4 data +data(dream4) + +## Run regularized DPM +res <- reg.dpm(data) + + }