Added puc calculation

loosolab · Mar 7, 2017 · 3360aff · 3360aff
1 parent 562bec5
commit 3360aff
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 18 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,5 +8,6 @@ License: MIT
 Encoding: UTF-8
 LazyData: true
 Imports: rPython,
-    entropy
+    entropy,
+    BiocParallel
 RoxygenNote: 6.0.1
diff --git a/R/pid.R b/R/pid.R
@@ -1,20 +1,3 @@
-#' For two sources, calculates partial information decompositions for all targets
-#'
-#' @param targets A discretized matrix, with each target in a column.
-#' @param x1 The discretized first source variable.
-#' @param x2 The discretized second source variable.
-#'
-#' @return A matrix with a row for each target and four columns corresponding to the PID components.
-#'
-#' @export
-tPID <- function(targets, x1, x2) {
-  results <- bplapply(1:ncol(targets), pid, x1, x2)
-
-  if (class(results) == "list") {
-
-  }
-}
-
 #' Provides a partial information decomposition of the source variables S = {X1, X2} about a target variable Z.
 #'
 #' @param z The discretized target variable

diff --git a/R/puc.R b/R/puc.R
@@ -0,0 +1,53 @@
+#' Pairwise proportional unique contributions
+#'
+#' This function calculates pairwise proportional contributions between all column pairs.
+#'
+#' @param data Discretized data matrix. Observations in rows, variables (features) in columns.
+#'
+#' @return A matrix of dimension \code{ncol(data) x ncol(data)} with pairwise PUC scores.
+#'
+#' @export
+calcPUC <- function(data) {
+  data <- as.matrix(data)
+  d <- ncol(data)
+
+  init <- array(0, dim = c(d, d))
+  co <- combn(1:d, 2)
+
+  # Function to calculate puc values, to be called from within next loop
+  .call_pid <- function(z, ind1, ind2, rev=FALSE) {
+    p <- pid(z=z, x1 = data[, ind1], x2 = data[, ind2])
+    mi1 <- (p$unique_x1 + p$redundancy)
+    mi2 <- (p$unique_x2 + p$redundancy)
+    c( p$unique_x1 / mi1, p$unique_x2 / mi2)
+  }
+
+  # Efficient calculation of PUC values for all gene triplets
+  puc_values_list <- BiocParallel::bplapply(1:d, function(c, init=NULL) {
+    z <- data[, c]
+
+    for(e in 1:ncol(co)) {
+      p <- .call_pid(z=z, ind1 = co[1,e], ind2 = co[2,e])
+
+      init[co[1,e], co[2,e]] <- p[1]
+      init[co[2,e], co[1,e]] <- p[2]
+    }
+    init
+  }, init=init)
+
+  puc_values <- array(as.numeric(unlist(puc_values_list)), dim=c(d, d, d), dimnames = list(colnames(data), colnames(data), colnames(data)))
+
+  # Aggregate puc values into pairwise puc scores
+  pairwise_puc_scores_list <- BiocParallel::bplapply(1:ncol(co), function(c) {
+    self <- c(co[1,c], co[2,c])
+    t1 <- puc_values[co[1,c], co[2,c], ]
+    t2 <- puc_values[co[2,c], co[1,c], ]
+    sum(t1[!t1 %in% self]) + sum(t2[!t2 %in% self])
+  })
+
+  pairwise_puc <- matrix(0, nrow = d, ncol = d, dimnames = list(colnames(data), colnames(data)))
+  pairwise_puc[upper.tri(pairwise_puc, diag=FALSE)] <- unlist(pairwise_puc_scores_list)
+  pairwise_puc[lower.tri(pairwise_puc, diag=F)] <- t(pairwise_puc)[lower.tri(pairwise_puc, diag = FALSE)]
+  pairwise_puc
+}
+