Skip to content
Permalink
d54f9b9bdc
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
108 lines (85 sloc) 2.78 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Computes the stochastic complexity of multinomial distribution.
http://www.sciencedirect.com/science/article/pii/S0020019007000944
"""
from __future__ import division
from collections import Counter
from decimal import Decimal
from math import ceil, e, factorial, log, pi, sqrt
from scipy.special import gammaln
__author__ = "Kailash Budhathoki"
__email__ = "kbudhath@mpi-inf.mpg.de"
__copyright__ = "Copyright (c) 2017"
__license__ = "MIT"
log2 = lambda n: log(n or 1, 2)
fact = lambda n: factorial(n)
def composition(n):
half = int(n / 2)
for i in range(half + 1):
r1, r2 = i, n - i
yield r1, r2
def multinomial_with_recurrence(L, n):
total = 1.0
b = 1.0
d = 10
bound = int(ceil(2 + sqrt(2 * n * d * log(10)))) # using equation (38)
for k in range(1, bound + 1):
b = (n - k + 1) / n * b
total += b
log_old_sum = log2(1.0)
log_total = log2(total)
log_n = log2(n)
for j in range(3, L + 1):
log_x = log_n + log_old_sum - log_total - log2(j - 2)
x = 2 ** log_x
# log_one_plus_x = (x + 8 * x / (2 + x) + x / (1 + x)) / 6
log_one_plus_x = log2(1 + x)
# one_plus_x = 1 + n * 2 ** log_old_sum / (2 ** log_total * (j - 2))
# log_one_plus_x = log2(one_plus_x)
log_new_sum = log_total + log_one_plus_x
log_old_sum = log_total
log_total = log_new_sum
# print log_total,
if L == 1:
log_total = log2(1.0)
return log_total
def stochastic_complexity(X, L=None):
freqs = Counter(X)
n = len(X)
L = L or len(freqs)
loglikelihood = 0.0
for freq in freqs.itervalues():
loglikelihood += freq * (log2(n) - log2(freq))
log_normalising_term = multinomial_with_recurrence(L, n)
sc = loglikelihood + log_normalising_term
return sc
def stochastic_complexity_slow(X):
freqs = Counter(X)
n, K = len(X), len(freqs)
likelihood = 1.0
for freq in freqs.itervalues():
likelihood *= (freq / n) ** freq
prev1 = 1.0
prev2 = 0.0
n_fact = fact(n)
for r1, r2 in composition(n):
prev2 += (n_fact / (fact(r1) * fact(r2))) * \
((r1 / n) ** r1) * ((r2 / n) ** r2)
for k in range(1, K - 1):
temp = prev2 + n * prev1 / k
prev1, prev2 = prev2, temp
loglikelihood = 0.0
for freq in freqs.itervalues():
loglikelihood += freq * (log2(n) - log2(freq))
sc = loglikelihood + log2(prev2)
return sc
def approx_normalizing_term(m, n):
res = ((m - 1) / 2) * log(n / (2 * pi)) + \
(m/2) * log(pi) - gammaln(m/2)
res = res * log(e, 2)
return res
if __name__ == "__main__":
L = 10
for n in xrange(10000,11000):
print multinomial_with_recurrence(L, n), approx_normalizing_term(L, n)