Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This module abstracts information measures."""
import abc
from enum import Enum
import numpy as np
from scipy.stats import chi2_contingency
from entropy import entropy
from sc import sc
class DMType(Enum):
NHST = 1 # Null Hypothesis Significance Testing
INFO = 2 # Information-theoretic
class DependenceMeasure(abc.ABC):
@property
@abc.abstractmethod
def type(self):
pass
@staticmethod
@abc.abstractmethod
def measure(seq1, seq2=None):
pass
class Entropy(DependenceMeasure):
type = DMType.INFO
def measure(seq1, seq2=None):
return entropy(seq1)
class StochasticComplexity(DependenceMeasure):
type = DMType.INFO
def measure(seq1, seq2=None):
return sc(seq1)
class ChiSquaredTest(DependenceMeasure):
type = DMType.NHST
@staticmethod
def contingency_table(seq1, seq2):
dom_seq1 = list(set(seq1))
dom_seq2 = list(set(seq2))
ndom_seq1 = len(dom_seq1)
ndom_seq2 = len(dom_seq2)
indices1 = dict(zip(dom_seq1, range(ndom_seq1)))
indices2 = dict(zip(dom_seq2, range(ndom_seq2)))
table = np.zeros((ndom_seq1, ndom_seq2))
for k, v1 in enumerate(seq1):
v2 = seq2[k]
i, j = indices1[v1], indices2[v2]
table[i, j] += 1
return table
@staticmethod
def nhst(seq1, seq2):
assert len(seq1) == len(seq2), "samples are not of the same size"
table = ChiSquaredTest.contingency_table(seq1, seq2)
chi2, p_value, _, _ = chi2_contingency(table, correction=True)
return chi2, p_value
@staticmethod
def measure(seq1, seq2=None):
chi2, p_value = ChiSquaredTest.nhst(seq1, seq2)
# we want to minimise the dependence between seq1 and seq2 in ANM
# that is, maximise the independence between seq1 and seq2 in ANM
# H0: seq1 and seq2 are independent
# H0 becomes true if p-value is greater than a threshold
# thus we want to maximise p-value, or minimise the negative of p-value
p_value *= -1
# as chi2 gets smaller, p-value increases (check the chi2 plot in wiki)
# if the p-value is too small, we reject H0 anyway
# in such a case, we want to minimise chi2
if p_value < 10 ** -16:
p_value = chi2
return p_value
if __name__ == "__main__":
print(ChiSquaredTest.measure(np.random.choice(
[1, 2, 3], 10), np.random.choice([1, 2], 10)))
print(Entropy.measure(np.random.choice([1, 2, 3], 10)))
print(StochasticComplexity.measure(np.random.choice([1, 2, 3], 10)))