entropic.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""The module implelements the "Entropic Causal Inference" paper.
"""
import numpy as np


class CausalPair(object):

    def __init__(self, data):
        self.data = data
        self.nofsamples = data.shape[0]
        self.X = data[0]
        self.Y = data[1]
        self.Xmin = np.min(data[0])
        self.Xmax = np.max(data[0])
        self.Ymin = np.min(data[1])
        self.Ymax = np.max(data[1])


def quantize_data(pair):
    # Find number of states, n based on X, Y first
    # We simply select n as large as possible and make sure on average each
    # state has around 10 samples.
    p = pair.nofsamples
    # We also need the number of unique values to make sure we know
    # when inputs are discrete variables
    # A variable is deemed discrete if noofunique values is less than 1/5th of
    # number of samples
    uniqueX = pair.X.unique()
    uniqueY = pair.Y.unique()
    n = int(decide_quantization(p, uniqueX, uniqueY))
    deltaX = (pair.Xmax - pair.Xmin) / n
    rulerX = [pair.Xmin + i * deltaX for i in range(0, n - 1)]
    rulerX.append(pair.Xmax)
    deltaY = (pair.Ymax - pair.Ymin) / n
    rulerY = [pair.Ymin + i * deltaY for i in range(0, n - 1)]
    rulerY.append(pair.Ymax)
    Xq = np.digitize(pair.X, bins=rulerX)
    Yq = np.digitize(pair.Y, bins=rulerY)
    return Xq, Yq, n, p


def decide_quantization(p, uniqueX, uniqueY):
    noUniqueX = len(uniqueX)
    noUniqueY = len(uniqueY)
    discreteX = 0
    discreteY = 0
    if 5 * noUniqueX < p:
        discreteX = 1
    if 5 * noUniqueY < p:
        discreteY = 1
    # 256 is chosen as the upper limit on the number of states
    n = np.min([256, discreteX*discreteY*np.max([noUniqueX,noUniqueY]) + discreteX*(1-discreteY)*np.max([noUniqueX, p/10]) + discreteY*(1-discreteX)*np.max([noUniqueY, p/10]) + (1-discreteY)*(1-discreteX)*p/10])
    return n


def remove_outliers(df):
    outliers_fraction = 0.005
    p = df.shape[0]
    rng = np.random.RandomState(42)
    classifier = IsolationForest(max_samples=p,
                                 contamination=outliers_fraction,
                                 random_state=rng)
    classifier.fit(df)
    labels = 0.5 * classifier.predict(df) + 0.5
    df = df[labels == 1]
    return df


def estimate_conditionals(Xq, Yq, n, p):
    # Mxy is conditional probability transition matrix X given Y: Mxy(i,j) =
    # P(X=i|Y=j)
    Mxy = np.zeros((n, n))
    Myx = np.zeros((n, n))

    for i in range(0, p):
        x = Xq[i]
        y = Yq[i]
        Mxy[x - 1, y - 1] += 1
        Myx[y - 1, x - 1] += 1

    u = Mxy.sum(axis=0)  # column sums: also marginal for y
    v = Myx.sum(axis=0)  # marginal for x

    for i in range(0, n):
        if u[i] != 0:
            Mxy[:, i] = Mxy[:, i].astype(np.float) / u[i]
        if v[i] != 0:
            Myx[:, i] = Myx[:, i].astype(np.float) / v[i]
    return Mxy, Myx, u / sum(u), v / sum(v)


def remove_zero_columns(M):
    t = (M == 0)
    v = np.all(t, axis=0)
    return M[:, ~v]


def entropy_minimizer(Myx, Mxy, n):
    # remove all zero columns
    Mxy = remove_zero_columns(Mxy)
    Myx = remove_zero_columns(Myx)

    nYX = Myx.shape[1]
    nXY = Mxy.shape[1]

    flag = 1
    eYX = []
    while flag:
        # choose min of max per column
        e = np.min(np.max(Myx, axis=0))
        eYX.append(e)
        Myx[np.argmax(Myx, axis=0), range(nYX)] -= e
        flag = sum(eYX) < 1 - 10**-9
    flag = 1
    eXY = []
    while flag:
        # choose min of max per column
        e = np.min(np.max(Mxy, axis=0))
        eXY.append(e)
        Mxy[np.argmax(Mxy, axis=0), range(nXY)] -= e
        flag = sum(eXY) < 1 - 10**-9

    return eYX, eXY  # eYX is exogenous entropy for X->Y


def calc_entropy(eYX):
    return sum([-np.log2(i**i) for i in eYX])


def entropic(df):
    # if not df:
    # df = read_file(file_name)
    # Mildly clean up the data by removing outliers here
    # otherwise a few points can mess up the whole quantization
    # Use an isolation forest fit by scikit learn
    # df = remove_outliers(df)
    pair = CausalPair(df)
    Xq, Yq, n, p = quantize_data(pair)

    # some columns of conditional probability tables will be zero, this is fine
    Mxy, Myx, pY, pX = estimate_conditionals(Xq, Yq, n, p)

    eYX, eXY = entropy_minimizer(Myx, Mxy, n)

    hYX = calc_entropy(eYX)
    hXY = calc_entropy(eXY)

    hX = calc_entropy(pX)
    hY = calc_entropy(pY)

    return hX + hYX, hY + hXY
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""The module implelements the "Entropic Causal Inference" paper.
	"""
	import numpy as np


	class CausalPair(object):

	def __init__(self, data):
	self.data = data
	self.nofsamples = data.shape[0]
	self.X = data[0]
	self.Y = data[1]
	self.Xmin = np.min(data[0])
	self.Xmax = np.max(data[0])
	self.Ymin = np.min(data[1])
	self.Ymax = np.max(data[1])


	def quantize_data(pair):
	# Find number of states, n based on X, Y first
	# We simply select n as large as possible and make sure on average each
	# state has around 10 samples.
	p = pair.nofsamples
	# We also need the number of unique values to make sure we know
	# when inputs are discrete variables
	# A variable is deemed discrete if noofunique values is less than 1/5th of
	# number of samples
	uniqueX = pair.X.unique()
	uniqueY = pair.Y.unique()
	n = int(decide_quantization(p, uniqueX, uniqueY))
	deltaX = (pair.Xmax - pair.Xmin) / n
	rulerX = [pair.Xmin + i * deltaX for i in range(0, n - 1)]
	rulerX.append(pair.Xmax)
	deltaY = (pair.Ymax - pair.Ymin) / n
	rulerY = [pair.Ymin + i * deltaY for i in range(0, n - 1)]
	rulerY.append(pair.Ymax)
	Xq = np.digitize(pair.X, bins=rulerX)
	Yq = np.digitize(pair.Y, bins=rulerY)
	return Xq, Yq, n, p


	def decide_quantization(p, uniqueX, uniqueY):
	noUniqueX = len(uniqueX)
	noUniqueY = len(uniqueY)
	discreteX = 0
	discreteY = 0
	if 5 * noUniqueX < p:
	discreteX = 1
	if 5 * noUniqueY < p:
	discreteY = 1
	# 256 is chosen as the upper limit on the number of states
	n = np.min([256, discreteXdiscreteYnp.max([noUniqueX,noUniqueY]) + discreteX(1-discreteY)np.max([noUniqueX, p/10]) + discreteY(1-discreteX)np.max([noUniqueY, p/10]) + (1-discreteY)(1-discreteX)p/10])
	return n


	def remove_outliers(df):
	outliers_fraction = 0.005
	p = df.shape[0]
	rng = np.random.RandomState(42)
	classifier = IsolationForest(max_samples=p,
	contamination=outliers_fraction,
	random_state=rng)
	classifier.fit(df)
	labels = 0.5 * classifier.predict(df) + 0.5
	df = df[labels == 1]
	return df


	def estimate_conditionals(Xq, Yq, n, p):
	# Mxy is conditional probability transition matrix X given Y: Mxy(i,j) =
	# P(X=i\|Y=j)
	Mxy = np.zeros((n, n))
	Myx = np.zeros((n, n))

	for i in range(0, p):
	x = Xq[i]
	y = Yq[i]
	Mxy[x - 1, y - 1] += 1
	Myx[y - 1, x - 1] += 1

	u = Mxy.sum(axis=0) # column sums: also marginal for y
	v = Myx.sum(axis=0) # marginal for x

	for i in range(0, n):
	if u[i] != 0:
	Mxy[:, i] = Mxy[:, i].astype(np.float) / u[i]
	if v[i] != 0:
	Myx[:, i] = Myx[:, i].astype(np.float) / v[i]
	return Mxy, Myx, u / sum(u), v / sum(v)


	def remove_zero_columns(M):
	t = (M == 0)
	v = np.all(t, axis=0)
	return M[:, ~v]


	def entropy_minimizer(Myx, Mxy, n):
	# remove all zero columns
	Mxy = remove_zero_columns(Mxy)
	Myx = remove_zero_columns(Myx)

	nYX = Myx.shape[1]
	nXY = Mxy.shape[1]

	flag = 1
	eYX = []
	while flag:
	# choose min of max per column
	e = np.min(np.max(Myx, axis=0))
	eYX.append(e)
	Myx[np.argmax(Myx, axis=0), range(nYX)] -= e
	flag = sum(eYX) < 1 - 10**-9
	flag = 1
	eXY = []
	while flag:
	# choose min of max per column
	e = np.min(np.max(Mxy, axis=0))
	eXY.append(e)
	Mxy[np.argmax(Mxy, axis=0), range(nXY)] -= e
	flag = sum(eXY) < 1 - 10**-9

	return eYX, eXY # eYX is exogenous entropy for X->Y


	def calc_entropy(eYX):
	return sum([-np.log2(i**i) for i in eYX])


	def entropic(df):
	# if not df:
	# df = read_file(file_name)
	# Mildly clean up the data by removing outliers here
	# otherwise a few points can mess up the whole quantization
	# Use an isolation forest fit by scikit learn
	# df = remove_outliers(df)
	pair = CausalPair(df)
	Xq, Yq, n, p = quantize_data(pair)

	# some columns of conditional probability tables will be zero, this is fine
	Mxy, Myx, pY, pX = estimate_conditionals(Xq, Yq, n, p)

	eYX, eXY = entropy_minimizer(Myx, Mxy, n)

	hYX = calc_entropy(eYX)
	hXY = calc_entropy(eXY)

	hX = calc_entropy(pX)
	hY = calc_entropy(pY)

	return hX + hYX, hY + hXY