Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
adding R package
  • Loading branch information
Ghanbari committed Aug 31, 2017
0 parents commit e5dd152
Show file tree
Hide file tree
Showing 19 changed files with 586 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
.Rproj.user
.Rhistory
18 changes: 18 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,18 @@
Package: DPM
Type: Package
Title: Distance Precision Matrix
Version: 0.1.0
Author: Mahsa Ghanbari, Julia lasserre
Maintainer: Mahsa Ghanbari <Masha.ghanbari@mdc-berlin.de>
Description: It computes DPM and regularized DPM
License: MIT + file LICENSE
Imports:
corpcor
Suggests:
mvtnorm,
pcalg,
qpgraph,
reshape2
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
21 changes: 21 additions & 0 deletions DPM.Rproj
@@ -0,0 +1,21 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace,vignette
10 changes: 10 additions & 0 deletions NAMESPACE
@@ -0,0 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(Dcenter)
export(dpm)
export(fisher_test)
export(get_link)
export(nldata)
export(rdata)
export(reg.dpm)
import(corpcor)
28 changes: 28 additions & 0 deletions R/Dcenter.R
@@ -0,0 +1,28 @@
#' Function to compute double-centred distance matrices
#'
#' @param x is a vector containing the values of a variable
#'
#' @return Dcentered vactor for x
#'
#' @examples
#' Dcenter(c(1,2,3,4))
#'
#' @export

############################################################
### Function to compute double-centred distance matrices ###
############################################################

Dcenter <- function(x) {
# x is a vector containing the values of a variable

n <- length(x) # number of points in dimension of x

# compute distance matrix a
a <- outer(x, x, function(x1, x2) abs(x1 - x2))

# normalise a
ahat <- a - matrix(rowMeans(a), nrow = n, ncol = n) - matrix(colMeans(a), nrow = n, ncol = n, byrow = TRUE) + mean(a)

return(ahat)
}
35 changes: 35 additions & 0 deletions R/dpm.R
@@ -0,0 +1,35 @@
#' Function to compute DPM
#'
#' @param data is a matrix containing samples in rows, variables in columns
#'
#' @return a matrix with score for every edge
#'
#' @import corpcor
#'
#' @examples
#' ## Generate non-linear data
#' data <- nldata(100,0.2)$data
#'
#'## run DPM
#' dpm(data)
#'
#' @export

###############################
### Function to compute DPM ###
###############################

dpm <- function(data) {
# data is a matrix containing samples in rows, variables in columns

# compute the double-centred vectors of each variable and concatenate them into the columns of matrix d
d <- apply(as.matrix(data), 2, Dcenter)

# compute the precision matrix of d (cor2pcor is a function from corpcor Library)
res <- corpcor::cor2pcor(cov(d))

return(res)
}



37 changes: 37 additions & 0 deletions R/fisher_test.R
@@ -0,0 +1,37 @@
#' Function to compute fisher z transformation pvalues
#'
#' @param p a
#' @param alpha a
#' @param nsamp is the number of required samples
#' @param nnodes is the number of nodes in the simulated graph
#'
#' @return a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links
#'
#' @examples
#' ## Generate non-linear data
#' data <- nldata(100,0.2)$data
#'
#'## Run regularized DPM
#' res <- reg.dpm(data)
#'
#' ## Compute pvalues
#' fisher_test(res, 0.05,)
#'
#' @export

###########################################################
### Function to compute fisher z transformation pvalues ###
###########################################################

fisher_test <- function(p, alpha, nsamp, nnodes){
z = sqrt(nsamp-(nnodes-2)-3) * 0.5 * abs(log((1 + p) / (1 - p)))
pval = 2 * pnorm(-z)
pval_tr = qnorm(1-alpha/2)
isnotnull = (z > pval_tr )
temp = exp( 2 * pval_tr / (sqrt(nsamp-(nnodes-2)-3)) )
threshold = (temp -1)/(temp + 1)
return(list(pval=pval, isnotnull=isnotnull,threshold=threshold))
}



49 changes: 49 additions & 0 deletions R/get_link.R
@@ -0,0 +1,49 @@
#' Converts the score matrix (obtained from DPM or reg-DPM) to a sorted list of non-directed links (links with higher score first).
#'
#' @param score_matrix score matrix obtained from DPM or regDPM.
#' @param top_ranked number of top links with higher score to report. The default value is NULL, i.e. all the links are reported.
#' @param threshold only links with a score above the threshold are reported. The default value is 0, i.e. all the links are reported.
#'
#' @return List of links in a data frame. Each line of the data frame corresponds to a link. The first and second columns are the corresponding nodes of the link and the third column is the score of the link.
#'
#' @examples
#'## Generate linear data with 10 nodes and 100 samples
#' data <- rdata(nnodes=10,nsamp=100)$data
#' rownames(data) <- paste("Sample", 1:100, sep="")
#' colnames(data) <- paste("Node", 1:10, sep="")
#'
#' ## Run reg_DPM
#' score_mat <- reg.dpm(data)
#'
#' ## Get ranking of link
#' links <- get_link(score_mat)
#' head(linkList)
#'
#' ## Get only top ranked links
#' links <- get_link(score_mat,top_ranked=10)
#'
#' ## Get only the links with a score higher than a threshold
#' links <- get_link(score_mat,threshold = 0.2)
#' @export
#'
#'
####################################################################################
### Function to converts the score matrix to a sorted list of non-directed links####
####################################################################################

get_link <- function(score_matrix, top_ranked=NULL, threshold=0) {
diag(score_matrix) <- NA
score_matrix[upper.tri(score_matrix)] <- NA
linkList <- reshape2::melt(abs(score_matrix), na.rm=TRUE)
colnames(linkList) <- c("Node1", "Node2", "score")
linkList <- linkList[linkList$score>=threshold,]
linkList <- linkList[order(linkList$score, decreasing=TRUE),]

if(!is.null(top_ranked)) {
linkList <- linkList[1:min(nrow(linkList), top_ranked),]
}

rownames(linkList) <- NULL

return(linkList)
}
69 changes: 69 additions & 0 deletions R/nldata.R
@@ -0,0 +1,69 @@
#' Function to generate simulated nonlinear data
#'
#' @param nsamp is the number of required samples
#' @param sigma is the standard deviation of the noise added to the generated data
#'
#' @return a list of a matrix containing samples in rows, variables in columns and the adjacency matrix of the network
#'
#' @examples
#' ## Generate non-linear data with 100 samples and the standard deviation of 0.2 for the noise added to the generated data
#' dataplus <- nldata(nsamp=100,sigma=0.2)
#'
#' ## data
#' data <- dataplus$data
#'
#' ## gold standard
#' gs <- dataplus$gs
#'
#'
#' @export


#######################################################
### Function to generate simulated non-linear data ####
#######################################################

nldata <- function(nsamp,sigma) {
# nsamp is the number of required samples
# sigma is the standard deviation of the noise added to the generated data

# graph used to generate non-linear data
gs <- matrix(c(1,0,1,1,0,0,0,0,0,0,0,
0,1,0,0,1,0,0,0,0,0,1,
1,0,1,0,0,1,1,0,0,0,0,
1,0,0,1,0,0,0,1,0,0,0,
0,1,0,0,1,0,0,1,1,0,0,
0,0,1,0,0,1,1,0,0,0,0,
0,0,1,0,0,1,1,0,0,0,0,
0,0,0,1,1,0,0,1,0,1,0,
0,0,0,0,1,0,0,0,1,0,1,
0,0,0,0,0,0,0,1,0,1,0,
0,1,0,0,0,0,0,0,1,0,1), nrow=11, ncol=11)

# build data containers and fill the first two nodes with random data
netdata = data.frame(A = runif(nsamp,-2,2),
B = runif(nsamp,-2,2),
C = rep(0, nsamp),
D = rep(0, nsamp),
E = rep(0, nsamp),
F = rep(0, nsamp),
G = rep(0, nsamp),
H = rep(0, nsamp),
I = rep(0, nsamp),
J = rep(0, nsamp),
K = rep(0, nsamp))

# build the rest of the data using non linear equations
netdata$C = 5 * netdata$A^2 -3 + rnorm(nsamp, 0, sigma)
netdata$D = 3 * netdata$A^3 - 5*netdata$A + 5 + rnorm(nsamp, 0, sigma)
netdata$E = (2*netdata$B+0.5)^2 - 4 + rnorm(nsamp, 0, sigma)
netdata$F = 2 * log(abs(netdata$C-6)) + 1 + rnorm(nsamp, 0, sigma)
netdata$G = 0.2 * (netdata$C-6)^2 + 0.4 * abs(netdata$F)^(1/2) - 14 + rnorm(nsamp, 0, sigma)
netdata$H = 0.4 * (netdata$D/2)^2 - 0.3 * (netdata$E/5)^3 - 1.5 * netdata$E - 10 + rnorm(nsamp, 0, sigma)
netdata$I = 0.4 * (netdata$E-5)^(2) - 0.5 * (netdata$E-5) - 20 + rnorm(nsamp, 0, sigma)
netdata$J = 0.2 * (netdata$H/10)^4 + 1.3 * (netdata$H/10)^3 + 0.4 *(netdata$H/10)^2 - 5 + rnorm(nsamp, 0, sigma)
netdata$K = 0.6 * (netdata$I/5)^2 - 0.8 * netdata$B^(3) - 3 + rnorm(nsamp, 0, sigma)

return(list(data=as.matrix(netdata),gs=gs))
}

71 changes: 71 additions & 0 deletions R/rdata.R
@@ -0,0 +1,71 @@
#' Function to generate simulated nonlinear data
#'
#' @param nnodes is the number of nodes in the simulated graph
#' @param nsamp is the number of required samples
#' @param prob is the probability
#'
#' @return a matrix containing samples in rows, variables in columns
#'
#' @examples
#' ## Generate linear data with 5 nodes and 100 samples
#' dataplus <- rdata(nnodes=5,nsamp=100)
#'
#' ##data
#' data <- dataplus$data
#'
#' ## undirected gold standard
#' gs <- dataplus$adj
#'
#' ## directed gold standard
#' gs <- dataplus$dag
#'
#' @export

###################################################
### Function to generate simulated linear data ####
###################################################

rdata <- function(nnodes, nsamp, prob=0.2) {
# nnodes is the number of nodes in the simulated graph
# nsamp is the number of required samples

# required libraries
#library("qpgraph")
#library("pcalg")

# create a random graph
g <- pcalg::randomDAG(nnodes, prob=prob, lB=1, uB=1)

# add edges if necessary, so that all nodes are connected to at least one other node
a <- as(g,"matrix")
if (sum(a[1,2:nnodes]) == 0) { # if the first node has no children
a[1,sample(2:nnodes,1)] = 1 # add a child
}
for (i in 2:(nnodes-1)) {
if ((sum(a[i,(i+1):nnodes]) == 0) & (sum(a[1:(i-1),i]) == 0)) { # if neither any parents nor any children
if (runif(1) <= (nnodes-i)/(nnodes-1)) { # draw a coin
a[i,sample((i+1):nnodes,1)] = 1 # heads, add a child
} else {
a[sample(1:(i-1)),i] = 1 # tails, add a parent
}
}
}
if (sum(a[1:(nnodes-1),nnodes]) == 0) { # if the last node has no parents
a[sample(1:(nnodes-1),1),nnodes] = 1 # add a parent
}
g = pcalg::getGraph(a)
g = as(g,"graphNEL")

# create the undirected graph
undirected_g <- a + t(a)

# create a covariance matrix based on this graph
Sigma<-qpgraph::qpG2Sigma(undirected_g, rho=0.5)

# generate Gaussian data with 0 mean using the covariance matrix
data<-mvtnorm::rmvnorm(nsamp,mean = rep(0, nrow(Sigma)),sigma=as.matrix(Sigma))
colnames(data)<-as.character(1:nnodes)

return(list(data=data, covmat=Sigma, adj=undirected_g, dag=g))
}

33 changes: 33 additions & 0 deletions R/reg.dpm.R
@@ -0,0 +1,33 @@
#' Function to compute regularized DPM
#'
#' @param data is a matrix containing samples in rows, variables in columns
#'
#' @return a matrix with score for every edge
#'
#' @import corpcor
#'
#' @examples
#' ## Generate non-linear data with 100 samples and the standard deviation of 0.2 of the noise added to the generated data
#' data <- nldata(nsamp=100,sigma=0.2)$data
#'
#'## run regularized DPM
#' reg.dpm(data)
#'
#' @export


###########################################
### Function to compute regularized DPM ###
###########################################

reg.dpm <- function(data) {
# data is a matrix containing samples in rows, variables in columns

# compute the double-centred vectors of each variable and concatenate them into the columns of matrix d
d <- apply(as.matrix(data), 2, Dcenter)

# compute the regularised precision matrix of d
res <- corpcor::pcor.shrink(d)

return(res)
}

0 comments on commit e5dd152

Please sign in to comment.