diff --git a/stat_tests.py b/stat_tests.py
deleted file mode 100644
index d3e491f..0000000
--- a/stat_tests.py
+++ /dev/null
@@ -1,712 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import numpy as np
-import scipy as sp
-import scipy.stats as st
-import itertools as it
-
-
-def binomial_sign_test(*args):
-    """
-        Performs a binomial sign test for two dependent samples.
-        Tests the hypothesis that the two dependent samples represent two different populations.
-
-        Parameters
-        ----------
-        sample1, sample2: array_like
-            The sample measurements for each group.
-
-        Returns
-        -------
-        B-value : float
-            The computed B-value of the test.
-        p-value : float
-            The associated p-value from the B-distribution.
-
-        References
-        ----------
-        D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 19: The Binomial Sign Test for Two Dependent Samples
-    """
-    k = len(args)
-    if k != 2:
-        raise ValueError('The test needs two samples')
-    n = len(args[0])
-
-    d_plus = 0
-    d_minus = 0
-    for i in range(n):
-        # Zero differences are eliminated
-        if args[0][i] < args[1][i]:
-            d_plus = d_plus + 1
-        elif args[0][i] > args[1][i]:
-            d_minus = d_minus + 1
-
-    x = max(d_plus, d_minus)
-    n = d_plus + d_minus
-
-    # Two-tailed of the smallest p-value
-    p_value = 2 * (1 - st.binom.cdf(x, n, 0.5))
-
-    return x, p_value
-
-
-def friedman_test(*args):
-    """
-        Performs a Friedman ranking test.
-        Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values.
-
-        Parameters
-        ----------
-        sample1, sample2, ... : array_like
-            The sample measurements for each group.
-
-        Returns
-        -------
-        F-value : float
-            The computed F-value of the test.
-        p-value : float
-            The associated p-value from the F-distribution.
-        rankings : array_like
-            The ranking for each group.
-        pivots : array_like
-            The pivotal quantities for each group.
-
-        References
-        ----------
-        M. Friedman, The use of ranks to avoid the assumption of normality implicit in the analysis of variance, Journal of the American Statistical Association 32 (1937) 674–701.
-        D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 25: The Friedman Two-Way Analysis of Variance by Ranks
-    """
-    k = len(args)
-    if k < 2:
-        raise ValueError('Less than 2 levels')
-    n = len(args[0])
-    if len(set([len(v) for v in args])) != 1:
-        raise ValueError('Unequal number of samples')
-
-    rankings = []
-    for i in range(n):
-        row = [col[i] for col in args]
-        row_sort = sorted(row)
-        rankings.append(
-            [row_sort.index(v) + 1 + (row_sort.count(v) - 1) / 2. for v in row])
-
-    rankings_avg = [sp.mean([case[j] for case in rankings]) for j in range(k)]
-    rankings_cmp = [r / sp.sqrt(k * (k + 1) / (6. * n)) for r in rankings_avg]
-
-    chi2 = ((12 * n) / float((k * (k + 1)))) * \
-        ((sp.sum(r**2 for r in rankings_avg)) - ((k * (k + 1)**2) / float(4)))
-    iman_davenport = ((n - 1) * chi2) / float((n * (k - 1) - chi2))
-
-    p_value = 1 - st.f.cdf(iman_davenport, k - 1, (k - 1) * (n - 1))
-
-    return iman_davenport, p_value, rankings_avg, rankings_cmp
-
-
-def friedman_aligned_ranks_test(*args):
-    """
-        Performs a Friedman aligned ranks ranking test.
-        Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values.
-        The difference with a friedman test is that it uses the median of each group to construct the ranking, which is useful when the number of samples is low.
-
-        Parameters
-        ----------
-        sample1, sample2, ... : array_like
-            The sample measurements for each group.
-
-        Returns
-        -------
-        Chi2-value : float
-            The computed Chi2-value of the test.
-        p-value : float
-            The associated p-value from the Chi2-distribution.
-        rankings : array_like
-            The ranking for each group.
-        pivots : array_like
-            The pivotal quantities for each group.
-
-        References
-        ----------
-         J.L. Hodges, E.L. Lehmann, Ranks methods for combination of independent experiments in analysis of variance, Annals of Mathematical Statistics 33 (1962) 482–497.
-    """
-    k = len(args)
-    if k < 2:
-        raise ValueError('Less than 2 levels')
-    n = len(args[0])
-    if len(set([len(v) for v in args])) != 1:
-        raise ValueError('Unequal number of samples')
-
-    aligned_observations = []
-    for i in range(n):
-        loc = sp.mean([col[i] for col in args])
-        aligned_observations.extend([col[i] - loc for col in args])
-
-    aligned_observations_sort = sorted(aligned_observations)
-
-    aligned_ranks = []
-    for i in range(n):
-        row = []
-        for j in range(k):
-            v = aligned_observations[i * k + j]
-            row.append(aligned_observations_sort.index(v) + 1 +
-                       (aligned_observations_sort.count(v) - 1) / 2.)
-        aligned_ranks.append(row)
-
-    rankings_avg = [sp.mean([case[j] for case in aligned_ranks])
-                    for j in range(k)]
-    rankings_cmp = [r / sp.sqrt(k * (n * k + 1) / 6.) for r in rankings_avg]
-
-    r_i = [np.sum(case) for case in aligned_ranks]
-    r_j = [np.sum([case[j] for case in aligned_ranks]) for j in range(k)]
-    T = (k - 1) * (sp.sum(v**2 for v in r_j) - (k * n**2 / 4.) * (k * n + 1)**2) / \
-        float(((k * n * (k * n + 1) * (2 * k * n + 1)) / 6.) -
-              (1. / float(k)) * sp.sum(v**2 for v in r_i))
-
-    p_value = 1 - st.chi2.cdf(T, k - 1)
-
-    return T, p_value, rankings_avg, rankings_cmp
-
-
-def quade_test(*args):
-    """
-        Performs a Quade ranking test.
-        Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values.
-        The difference with a friedman test is that it uses the median for each sample to wiehgt the ranking.
-
-        Parameters
-        ----------
-        sample1, sample2, ... : array_like
-            The sample measurements for each group.
-
-        Returns
-        -------
-        F-value : float
-            The computed F-value of the test.
-        p-value : float
-            The associated p-value from the F-distribution.
-        rankings : array_like
-            The ranking for each group.
-        pivots : array_like
-            The pivotal quantities for each group.
-
-        References
-        ----------
-        D. Quade, Using weighted rankings in the analysis of complete blocks with additive block effects, Journal of the American Statistical Association 74 (1979) 680–683.
-    """
-    k = len(args)
-    if k < 2:
-        raise ValueError('Less than 2 levels')
-    n = len(args[0])
-    if len(set([len(v) for v in args])) != 1:
-        raise ValueError('Unequal number of samples')
-
-    rankings = []
-    ranges = []
-    for i in range(n):
-        row = [col[i] for col in args]
-        ranges.append(max(row) - min(row))
-        row_sort = sorted(row)
-        rankings.append(
-            [row_sort.index(v) + 1 + (row_sort.count(v) - 1) / 2. for v in row])
-
-    ranges_sort = sorted(ranges)
-    ranking_cases = [ranges_sort.index(
-        v) + 1 + (ranges_sort.count(v) - 1) / 2. for v in ranges]
-
-    S = []
-    W = []
-    for i in range(n):
-        S.append([ranking_cases[i] * (r - (k + 1) / 2.) for r in rankings[i]])
-        W.append([ranking_cases[i] * r for r in rankings[i]])
-
-    Sj = [np.sum(row[j] for row in S) for j in range(k)]
-    Wj = [np.sum(row[j] for row in W) for j in range(k)]
-
-    rankings_avg = [w / (n * (n + 1) / 2.) for w in Wj]
-    rankings_cmp = [r / sp.sqrt(k * (k + 1) * (2 * n + 1)
-                                * (k - 1) / (18. * n * (n + 1))) for r in rankings_avg]
-
-    A = sp.sum(S[i][j]**2 for i in range(n) for j in range(k))
-    B = sp.sum(s**2 for s in Sj) / float(n)
-    F = (n - 1) * B / (A - B)
-
-    p_value = 1 - st.f.cdf(F, k - 1, (k - 1) * (n - 1))
-
-    return F, p_value, rankings_avg, rankings_cmp
-
-
-def bonferroni_dunn_test(ranks, control=None):
-    """
-        Performs a Bonferroni-Dunn post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of the control method is different to each of the other methods.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-        control : string optional
-            The name of the control method (one vs all), default None (all vs all)
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        O.J. Dunn, Multiple comparisons among means, Journal of the American Statistical Association 56 (1961) 52–64.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    if not control:
-        control_i = values.index(min(values))
-    else:
-        control_i = keys.index(control)
-
-    comparisons = [keys[control_i] + " vs " + keys[i]
-                   for i in range(k) if i != control_i]
-    z_values = [abs(values[control_i] - values[i])
-                for i in range(k) if i != control_i]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    adj_p_values = [min((k - 1) * p_value, 1) for p_value in p_values]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def holm_test(ranks, control=None):
-    """
-        Performs a Holm post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of the control method is different to each of the other methods.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-        control : string optional
-            The name of the control method (one vs all), default None (all vs all)
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        O.J. S. Holm, A simple sequentially rejective multiple test procedure, Scandinavian Journal of Statistics 6 (1979) 65–70.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    if not control:
-        control_i = values.index(min(values))
-    else:
-        control_i = keys.index(control)
-
-    comparisons = [keys[control_i] + " vs " + keys[i]
-                   for i in range(k) if i != control_i]
-    z_values = [abs(values[control_i] - values[i])
-                for i in range(k) if i != control_i]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    adj_p_values = [min(max((k - (j + 1)) * p_values[j]
-                            for j in range(i + 1)), 1) for i in range(k - 1)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def hochberg_test(ranks, control=None):
-    """
-        Performs a Hochberg post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of the control method is different to each of the other methods.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-        control : string optional
-            The name of the control method,  default the group with minimum ranking
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        Y. Hochberg, A sharper Bonferroni procedure for multiple tests of significance, Biometrika 75 (1988) 800–803.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    if not control:
-        control_i = values.index(min(values))
-    else:
-        control_i = keys.index(control)
-
-    comparisons = [keys[control_i] + " vs " + keys[i]
-                   for i in range(k) if i != control_i]
-    z_values = [abs(values[control_i] - values[i])
-                for i in range(k) if i != control_i]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    adj_p_values = [min(max((k - j) * p_values[j - 1]
-                            for j in range(k - 1, i, -1)), 1) for i in range(k - 1)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def li_test(ranks, control=None):
-    """
-        Performs a Li post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of the control method is different to each of the other methods.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-        control : string optional
-            The name of the control method,  default the group with minimum ranking
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        J. Li, A two-step rejection procedure for testing multiple hypotheses, Journal of Statistical Planning and Inference 138 (2008) 1521–1527.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    if not control:
-        control_i = values.index(min(values))
-    else:
-        control_i = keys.index(control)
-
-    comparisons = [keys[control_i] + " vs " + keys[i]
-                   for i in range(k) if i != control_i]
-    z_values = [abs(values[control_i] - values[i])
-                for i in range(k) if i != control_i]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    adj_p_values = [p_values[i] /
-                    (p_values[i] + 1 - p_values[-1]) for i in range(k - 1)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def finner_test(ranks, control=None):
-    """
-        Performs a Finner post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of the control method is different to each of the other methods.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-        control : string optional
-            The name of the control method,  default the group with minimum ranking
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        H. Finner, On a monotonicity problem in step-down multiple test procedures, Journal of the American Statistical Association 88 (1993) 920–923.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    if not control:
-        control_i = values.index(min(values))
-    else:
-        control_i = keys.index(control)
-
-    comparisons = [keys[control_i] + " vs " + keys[i]
-                   for i in range(k) if i != control_i]
-    z_values = [abs(values[control_i] - values[i])
-                for i in range(k) if i != control_i]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    adj_p_values = [min(max(1 - (1 - p_values[j])**((k - 1) / float(j + 1))
-                            for j in range(i + 1)), 1) for i in range(k - 1)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def nemenyi_multitest(ranks):
-    """
-        Performs a Nemenyi post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of each pair of groups are different.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        Bonferroni-Dunn: O.J. Dunn, Multiple comparisons among means, Journal of the American Statistical Association 56 (1961) 52–64.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    versus = list(it.combinations(range(k), 2))
-
-    comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus]
-    z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    m = int(k * (k - 1) / 2.)
-    adj_p_values = [min(m * p_value, 1) for p_value in p_values]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def holm_multitest(ranks):
-    """
-        Performs a Holm post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of each pair of groups are different.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        O.J. S. Holm, A simple sequentially rejective multiple test procedure, Scandinavian Journal of Statistics 6 (1979) 65–70.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    versus = list(it.combinations(range(k), 2))
-
-    comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus]
-    z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    m = int(k * (k - 1) / 2.)
-    adj_p_values = [min(max((m - j) * p_values[j]
-                            for j in range(i + 1)), 1) for i in range(m)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def hochberg_multitest(ranks):
-    """
-        Performs a Hochberg post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of each pair of groups are different.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        Y. Hochberg, A sharper Bonferroni procedure for multiple tests of significance, Biometrika 75 (1988) 800–803.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    versus = list(it.combinations(range(k), 2))
-
-    comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus]
-    z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    m = int(k * (k - 1) / 2.)
-    adj_p_values = [max((m + 1 - j) * p_values[j - 1]
-                        for j in range(m, i, -1))for i in range(m)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def finner_multitest(ranks):
-    """
-        Performs a Finner post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of each pair of groups are different.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        H. Finner, On a monotonicity problem in step-down multiple test procedures, Journal of the American Statistical Association 88 (1993) 920–923.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    versus = list(it.combinations(range(k), 2))
-
-    comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus]
-    z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    m = int(k * (k - 1) / 2.)
-    adj_p_values = [min(max(1 - (1 - p_values[j])**(m / float(j + 1))
-                            for j in range(i + 1)), 1) for i in range(m)]
-
-    return comparisons, z_values, p_values, adj_p_values
-
-
-def _S(k):
-    """
-        Helper function for the Shaffer test.
-        It obtains the number of independent test hypotheses when using an All vs All strategy using the number of groups to be compared.
-    """
-    if k == 0 or k == 1:
-        return {0}
-    else:
-        result = set()
-        for j in reversed(range(1, k + 1)):
-            tmp = S(k - j)
-            for s in tmp:
-                result = result.union({sp.special.binom(j, 2) + s})
-        return list(result)
-
-
-def shaffer_multitest(ranks):
-    """
-        Performs a Shaffer post-hoc test using the pivot quantities obtained by a ranking test.
-        Tests the hypothesis that the ranking of each pair of groups are different.
-
-        Parameters
-        ----------
-        pivots : dictionary_like
-            A dictionary with format 'groupname':'pivotal quantity'
-
-        Returns
-        ----------
-        Comparions : array-like
-            Strings identifier of each comparison with format 'group_i vs group_j'
-        Z-values : array-like
-            The computed Z-value statistic for each comparison.
-        p-values : array-like
-            The associated p-value from the Z-distribution wich depends on the index of the comparison
-        Adjusted p-values : array-like
-            The associated adjusted p-values wich can be compared with a significance level
-
-        References
-        ----------
-        J. Li, A two-step rejection procedure for testing multiple hypotheses, Journal of Statistical Planning and Inference 138 (2008) 1521–1527.
-    """
-    k = len(ranks)
-    values = ranks.values()
-    keys = ranks.keys()
-    versus = list(it.combinations(range(k), 2))
-
-    m = int(k * (k - 1) / 2.)
-    A = _S(int((1 + sp.sqrt(1 + 4 * m * 2)) / 2))
-    t = [max([a for a in A if a <= m - i]) for i in range(m)]
-
-    comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus]
-    z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus]
-    p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values]
-    # Sort values by p_value so that p_0 < p_1
-    p_values, z_values, comparisons = map(
-        list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
-    adj_p_values = [min(max(t[j] * p_values[j]
-                            for j in range(i + 1)), 1) for i in range(m)]
-
-    return comparisons, z_values, p_values, adj_p_values