Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
adding R package
- Loading branch information
Ghanbari
committed
Aug 31, 2017
0 parents
commit e5dd152
Showing
19 changed files
with
586 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.Rproj.user | ||
.Rhistory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
Package: DPM | ||
Type: Package | ||
Title: Distance Precision Matrix | ||
Version: 0.1.0 | ||
Author: Mahsa Ghanbari, Julia lasserre | ||
Maintainer: Mahsa Ghanbari <Masha.ghanbari@mdc-berlin.de> | ||
Description: It computes DPM and regularized DPM | ||
License: MIT + file LICENSE | ||
Imports: | ||
corpcor | ||
Suggests: | ||
mvtnorm, | ||
pcalg, | ||
qpgraph, | ||
reshape2 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX | ||
|
||
AutoAppendNewline: Yes | ||
StripTrailingWhitespace: Yes | ||
|
||
BuildType: Package | ||
PackageUseDevtools: Yes | ||
PackageInstallArgs: --no-multiarch --with-keep.source | ||
PackageRoxygenize: rd,collate,namespace,vignette |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(Dcenter) | ||
export(dpm) | ||
export(fisher_test) | ||
export(get_link) | ||
export(nldata) | ||
export(rdata) | ||
export(reg.dpm) | ||
import(corpcor) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#' Function to compute double-centred distance matrices | ||
#' | ||
#' @param x is a vector containing the values of a variable | ||
#' | ||
#' @return Dcentered vactor for x | ||
#' | ||
#' @examples | ||
#' Dcenter(c(1,2,3,4)) | ||
#' | ||
#' @export | ||
|
||
############################################################ | ||
### Function to compute double-centred distance matrices ### | ||
############################################################ | ||
|
||
Dcenter <- function(x) { | ||
# x is a vector containing the values of a variable | ||
|
||
n <- length(x) # number of points in dimension of x | ||
|
||
# compute distance matrix a | ||
a <- outer(x, x, function(x1, x2) abs(x1 - x2)) | ||
|
||
# normalise a | ||
ahat <- a - matrix(rowMeans(a), nrow = n, ncol = n) - matrix(colMeans(a), nrow = n, ncol = n, byrow = TRUE) + mean(a) | ||
|
||
return(ahat) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#' Function to compute DPM | ||
#' | ||
#' @param data is a matrix containing samples in rows, variables in columns | ||
#' | ||
#' @return a matrix with score for every edge | ||
#' | ||
#' @import corpcor | ||
#' | ||
#' @examples | ||
#' ## Generate non-linear data | ||
#' data <- nldata(100,0.2)$data | ||
#' | ||
#'## run DPM | ||
#' dpm(data) | ||
#' | ||
#' @export | ||
|
||
############################### | ||
### Function to compute DPM ### | ||
############################### | ||
|
||
dpm <- function(data) { | ||
# data is a matrix containing samples in rows, variables in columns | ||
|
||
# compute the double-centred vectors of each variable and concatenate them into the columns of matrix d | ||
d <- apply(as.matrix(data), 2, Dcenter) | ||
|
||
# compute the precision matrix of d (cor2pcor is a function from corpcor Library) | ||
res <- corpcor::cor2pcor(cov(d)) | ||
|
||
return(res) | ||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#' Function to compute fisher z transformation pvalues | ||
#' | ||
#' @param p a | ||
#' @param alpha a | ||
#' @param nsamp is the number of required samples | ||
#' @param nnodes is the number of nodes in the simulated graph | ||
#' | ||
#' @return a list contains the pvalues (pval) and correspending decisions of having a significant link (isnotnull) for all links | ||
#' | ||
#' @examples | ||
#' ## Generate non-linear data | ||
#' data <- nldata(100,0.2)$data | ||
#' | ||
#'## Run regularized DPM | ||
#' res <- reg.dpm(data) | ||
#' | ||
#' ## Compute pvalues | ||
#' fisher_test(res, 0.05,) | ||
#' | ||
#' @export | ||
|
||
########################################################### | ||
### Function to compute fisher z transformation pvalues ### | ||
########################################################### | ||
|
||
fisher_test <- function(p, alpha, nsamp, nnodes){ | ||
z = sqrt(nsamp-(nnodes-2)-3) * 0.5 * abs(log((1 + p) / (1 - p))) | ||
pval = 2 * pnorm(-z) | ||
pval_tr = qnorm(1-alpha/2) | ||
isnotnull = (z > pval_tr ) | ||
temp = exp( 2 * pval_tr / (sqrt(nsamp-(nnodes-2)-3)) ) | ||
threshold = (temp -1)/(temp + 1) | ||
return(list(pval=pval, isnotnull=isnotnull,threshold=threshold)) | ||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#' Converts the score matrix (obtained from DPM or reg-DPM) to a sorted list of non-directed links (links with higher score first). | ||
#' | ||
#' @param score_matrix score matrix obtained from DPM or regDPM. | ||
#' @param top_ranked number of top links with higher score to report. The default value is NULL, i.e. all the links are reported. | ||
#' @param threshold only links with a score above the threshold are reported. The default value is 0, i.e. all the links are reported. | ||
#' | ||
#' @return List of links in a data frame. Each line of the data frame corresponds to a link. The first and second columns are the corresponding nodes of the link and the third column is the score of the link. | ||
#' | ||
#' @examples | ||
#'## Generate linear data with 10 nodes and 100 samples | ||
#' data <- rdata(nnodes=10,nsamp=100)$data | ||
#' rownames(data) <- paste("Sample", 1:100, sep="") | ||
#' colnames(data) <- paste("Node", 1:10, sep="") | ||
#' | ||
#' ## Run reg_DPM | ||
#' score_mat <- reg.dpm(data) | ||
#' | ||
#' ## Get ranking of link | ||
#' links <- get_link(score_mat) | ||
#' head(linkList) | ||
#' | ||
#' ## Get only top ranked links | ||
#' links <- get_link(score_mat,top_ranked=10) | ||
#' | ||
#' ## Get only the links with a score higher than a threshold | ||
#' links <- get_link(score_mat,threshold = 0.2) | ||
#' @export | ||
#' | ||
#' | ||
#################################################################################### | ||
### Function to converts the score matrix to a sorted list of non-directed links#### | ||
#################################################################################### | ||
|
||
get_link <- function(score_matrix, top_ranked=NULL, threshold=0) { | ||
diag(score_matrix) <- NA | ||
score_matrix[upper.tri(score_matrix)] <- NA | ||
linkList <- reshape2::melt(abs(score_matrix), na.rm=TRUE) | ||
colnames(linkList) <- c("Node1", "Node2", "score") | ||
linkList <- linkList[linkList$score>=threshold,] | ||
linkList <- linkList[order(linkList$score, decreasing=TRUE),] | ||
|
||
if(!is.null(top_ranked)) { | ||
linkList <- linkList[1:min(nrow(linkList), top_ranked),] | ||
} | ||
|
||
rownames(linkList) <- NULL | ||
|
||
return(linkList) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#' Function to generate simulated nonlinear data | ||
#' | ||
#' @param nsamp is the number of required samples | ||
#' @param sigma is the standard deviation of the noise added to the generated data | ||
#' | ||
#' @return a list of a matrix containing samples in rows, variables in columns and the adjacency matrix of the network | ||
#' | ||
#' @examples | ||
#' ## Generate non-linear data with 100 samples and the standard deviation of 0.2 for the noise added to the generated data | ||
#' dataplus <- nldata(nsamp=100,sigma=0.2) | ||
#' | ||
#' ## data | ||
#' data <- dataplus$data | ||
#' | ||
#' ## gold standard | ||
#' gs <- dataplus$gs | ||
#' | ||
#' | ||
#' @export | ||
|
||
|
||
####################################################### | ||
### Function to generate simulated non-linear data #### | ||
####################################################### | ||
|
||
nldata <- function(nsamp,sigma) { | ||
# nsamp is the number of required samples | ||
# sigma is the standard deviation of the noise added to the generated data | ||
|
||
# graph used to generate non-linear data | ||
gs <- matrix(c(1,0,1,1,0,0,0,0,0,0,0, | ||
0,1,0,0,1,0,0,0,0,0,1, | ||
1,0,1,0,0,1,1,0,0,0,0, | ||
1,0,0,1,0,0,0,1,0,0,0, | ||
0,1,0,0,1,0,0,1,1,0,0, | ||
0,0,1,0,0,1,1,0,0,0,0, | ||
0,0,1,0,0,1,1,0,0,0,0, | ||
0,0,0,1,1,0,0,1,0,1,0, | ||
0,0,0,0,1,0,0,0,1,0,1, | ||
0,0,0,0,0,0,0,1,0,1,0, | ||
0,1,0,0,0,0,0,0,1,0,1), nrow=11, ncol=11) | ||
|
||
# build data containers and fill the first two nodes with random data | ||
netdata = data.frame(A = runif(nsamp,-2,2), | ||
B = runif(nsamp,-2,2), | ||
C = rep(0, nsamp), | ||
D = rep(0, nsamp), | ||
E = rep(0, nsamp), | ||
F = rep(0, nsamp), | ||
G = rep(0, nsamp), | ||
H = rep(0, nsamp), | ||
I = rep(0, nsamp), | ||
J = rep(0, nsamp), | ||
K = rep(0, nsamp)) | ||
|
||
# build the rest of the data using non linear equations | ||
netdata$C = 5 * netdata$A^2 -3 + rnorm(nsamp, 0, sigma) | ||
netdata$D = 3 * netdata$A^3 - 5*netdata$A + 5 + rnorm(nsamp, 0, sigma) | ||
netdata$E = (2*netdata$B+0.5)^2 - 4 + rnorm(nsamp, 0, sigma) | ||
netdata$F = 2 * log(abs(netdata$C-6)) + 1 + rnorm(nsamp, 0, sigma) | ||
netdata$G = 0.2 * (netdata$C-6)^2 + 0.4 * abs(netdata$F)^(1/2) - 14 + rnorm(nsamp, 0, sigma) | ||
netdata$H = 0.4 * (netdata$D/2)^2 - 0.3 * (netdata$E/5)^3 - 1.5 * netdata$E - 10 + rnorm(nsamp, 0, sigma) | ||
netdata$I = 0.4 * (netdata$E-5)^(2) - 0.5 * (netdata$E-5) - 20 + rnorm(nsamp, 0, sigma) | ||
netdata$J = 0.2 * (netdata$H/10)^4 + 1.3 * (netdata$H/10)^3 + 0.4 *(netdata$H/10)^2 - 5 + rnorm(nsamp, 0, sigma) | ||
netdata$K = 0.6 * (netdata$I/5)^2 - 0.8 * netdata$B^(3) - 3 + rnorm(nsamp, 0, sigma) | ||
|
||
return(list(data=as.matrix(netdata),gs=gs)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#' Function to generate simulated nonlinear data | ||
#' | ||
#' @param nnodes is the number of nodes in the simulated graph | ||
#' @param nsamp is the number of required samples | ||
#' @param prob is the probability | ||
#' | ||
#' @return a matrix containing samples in rows, variables in columns | ||
#' | ||
#' @examples | ||
#' ## Generate linear data with 5 nodes and 100 samples | ||
#' dataplus <- rdata(nnodes=5,nsamp=100) | ||
#' | ||
#' ##data | ||
#' data <- dataplus$data | ||
#' | ||
#' ## undirected gold standard | ||
#' gs <- dataplus$adj | ||
#' | ||
#' ## directed gold standard | ||
#' gs <- dataplus$dag | ||
#' | ||
#' @export | ||
|
||
################################################### | ||
### Function to generate simulated linear data #### | ||
################################################### | ||
|
||
rdata <- function(nnodes, nsamp, prob=0.2) { | ||
# nnodes is the number of nodes in the simulated graph | ||
# nsamp is the number of required samples | ||
|
||
# required libraries | ||
#library("qpgraph") | ||
#library("pcalg") | ||
|
||
# create a random graph | ||
g <- pcalg::randomDAG(nnodes, prob=prob, lB=1, uB=1) | ||
|
||
# add edges if necessary, so that all nodes are connected to at least one other node | ||
a <- as(g,"matrix") | ||
if (sum(a[1,2:nnodes]) == 0) { # if the first node has no children | ||
a[1,sample(2:nnodes,1)] = 1 # add a child | ||
} | ||
for (i in 2:(nnodes-1)) { | ||
if ((sum(a[i,(i+1):nnodes]) == 0) & (sum(a[1:(i-1),i]) == 0)) { # if neither any parents nor any children | ||
if (runif(1) <= (nnodes-i)/(nnodes-1)) { # draw a coin | ||
a[i,sample((i+1):nnodes,1)] = 1 # heads, add a child | ||
} else { | ||
a[sample(1:(i-1)),i] = 1 # tails, add a parent | ||
} | ||
} | ||
} | ||
if (sum(a[1:(nnodes-1),nnodes]) == 0) { # if the last node has no parents | ||
a[sample(1:(nnodes-1),1),nnodes] = 1 # add a parent | ||
} | ||
g = pcalg::getGraph(a) | ||
g = as(g,"graphNEL") | ||
|
||
# create the undirected graph | ||
undirected_g <- a + t(a) | ||
|
||
# create a covariance matrix based on this graph | ||
Sigma<-qpgraph::qpG2Sigma(undirected_g, rho=0.5) | ||
|
||
# generate Gaussian data with 0 mean using the covariance matrix | ||
data<-mvtnorm::rmvnorm(nsamp,mean = rep(0, nrow(Sigma)),sigma=as.matrix(Sigma)) | ||
colnames(data)<-as.character(1:nnodes) | ||
|
||
return(list(data=data, covmat=Sigma, adj=undirected_g, dag=g)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#' Function to compute regularized DPM | ||
#' | ||
#' @param data is a matrix containing samples in rows, variables in columns | ||
#' | ||
#' @return a matrix with score for every edge | ||
#' | ||
#' @import corpcor | ||
#' | ||
#' @examples | ||
#' ## Generate non-linear data with 100 samples and the standard deviation of 0.2 of the noise added to the generated data | ||
#' data <- nldata(nsamp=100,sigma=0.2)$data | ||
#' | ||
#'## run regularized DPM | ||
#' reg.dpm(data) | ||
#' | ||
#' @export | ||
|
||
|
||
########################################### | ||
### Function to compute regularized DPM ### | ||
########################################### | ||
|
||
reg.dpm <- function(data) { | ||
# data is a matrix containing samples in rows, variables in columns | ||
|
||
# compute the double-centred vectors of each variable and concatenate them into the columns of matrix d | ||
d <- apply(as.matrix(data), 2, Dcenter) | ||
|
||
# compute the regularised precision matrix of d | ||
res <- corpcor::pcor.shrink(d) | ||
|
||
return(res) | ||
} |
Oops, something went wrong.