diff --git a/DESCRIPTION b/DESCRIPTION index a4a135e..ecead68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,5 +8,6 @@ License: MIT Encoding: UTF-8 LazyData: true Imports: rPython, - entropy + entropy, + BiocParallel RoxygenNote: 6.0.1 diff --git a/R/pid.R b/R/pid.R index e580402..8b67028 100644 --- a/R/pid.R +++ b/R/pid.R @@ -1,20 +1,3 @@ -#' For two sources, calculates partial information decompositions for all targets -#' -#' @param targets A discretized matrix, with each target in a column. -#' @param x1 The discretized first source variable. -#' @param x2 The discretized second source variable. -#' -#' @return A matrix with a row for each target and four columns corresponding to the PID components. -#' -#' @export -tPID <- function(targets, x1, x2) { - results <- bplapply(1:ncol(targets), pid, x1, x2) - - if (class(results) == "list") { - - } -} - #' Provides a partial information decomposition of the source variables S = {X1, X2} about a target variable Z. #' #' @param z The discretized target variable diff --git a/R/puc.R b/R/puc.R new file mode 100644 index 0000000..85909ad --- /dev/null +++ b/R/puc.R @@ -0,0 +1,53 @@ +#' Pairwise proportional unique contributions +#' +#' This function calculates pairwise proportional contributions between all column pairs. +#' +#' @param data Discretized data matrix. Observations in rows, variables (features) in columns. +#' +#' @return A matrix of dimension \code{ncol(data) x ncol(data)} with pairwise PUC scores. +#' +#' @export +calcPUC <- function(data) { + data <- as.matrix(data) + d <- ncol(data) + + init <- array(0, dim = c(d, d)) + co <- combn(1:d, 2) + + # Function to calculate puc values, to be called from within next loop + .call_pid <- function(z, ind1, ind2, rev=FALSE) { + p <- pid(z=z, x1 = data[, ind1], x2 = data[, ind2]) + mi1 <- (p$unique_x1 + p$redundancy) + mi2 <- (p$unique_x2 + p$redundancy) + c( p$unique_x1 / mi1, p$unique_x2 / mi2) + } + + # Efficient calculation of PUC values for all gene triplets + puc_values_list <- BiocParallel::bplapply(1:d, function(c, init=NULL) { + z <- data[, c] + + for(e in 1:ncol(co)) { + p <- .call_pid(z=z, ind1 = co[1,e], ind2 = co[2,e]) + + init[co[1,e], co[2,e]] <- p[1] + init[co[2,e], co[1,e]] <- p[2] + } + init + }, init=init) + + puc_values <- array(as.numeric(unlist(puc_values_list)), dim=c(d, d, d), dimnames = list(colnames(data), colnames(data), colnames(data))) + + # Aggregate puc values into pairwise puc scores + pairwise_puc_scores_list <- BiocParallel::bplapply(1:ncol(co), function(c) { + self <- c(co[1,c], co[2,c]) + t1 <- puc_values[co[1,c], co[2,c], ] + t2 <- puc_values[co[2,c], co[1,c], ] + sum(t1[!t1 %in% self]) + sum(t2[!t2 %in% self]) + }) + + pairwise_puc <- matrix(0, nrow = d, ncol = d, dimnames = list(colnames(data), colnames(data))) + pairwise_puc[upper.tri(pairwise_puc, diag=FALSE)] <- unlist(pairwise_puc_scores_list) + pairwise_puc[lower.tri(pairwise_puc, diag=F)] <- t(pairwise_puc)[lower.tri(pairwise_puc, diag = FALSE)] + pairwise_puc +} +