diff --git a/dc.py b/dc.py index ba37a69..00ac6d6 100644 --- a/dc.py +++ b/dc.py @@ -5,55 +5,20 @@ """ from collections import defaultdict from math import sqrt + import numpy as np from scipy.spatial.distance import pdist, squareform +from dcor import dcor + def dc(X, Y): prob_X, marg_X, prob_Y, marg_Y = distributions(X, Y) - dXtoY = dCor(prob_X, marg_Y) - dYtoX = dCor(prob_Y, marg_X) + dXtoY = dcor(prob_X, marg_Y) + dYtoX = dcor(prob_Y, marg_X) return (dXtoY, dYtoX) -def dcov(X, Y): - n = X.shape[0] - XY = np.multiply(X, Y) - cov = sqrt(XY.sum()) / n - return cov - - -def dvar(X): - return sqrt(np.sum(X ** 2 / X.shape[0] ** 2)) - - -def cent_dist(X): - M = squareform(pdist(X)) # distance matrix - rmean = M.mean(axis=1) - cmean = M.mean(axis=0) - gmean = rmean.mean() - Rmean = np.tile(rmean, (M.shape[0], 1)).transpose() - Cmean = np.tile(cmean, (M.shape[1], 1)) - Gmean = np.tile(gmean, M.shape) - CM = M - Rmean - Cmean + Gmean - return CM - - -def dCor(X, Y): - A = cent_dist(X) - B = cent_dist(Y) - - dcov_AB = dcov(A, B) - dvar_A = dvar(A) - dvar_B = dvar(B) - - dcor = 0.0 - if dvar_A > 0.0 and dvar_B > 0.0: - dcor = dcov_AB / sqrt(dvar_A * dvar_B) - - return dcor - - def distributions(X, Y): N = len(X) unq_X = set(map(tuple, X))