diff --git a/stat_tests.py b/stat_tests.py deleted file mode 100644 index d3e491f..0000000 --- a/stat_tests.py +++ /dev/null @@ -1,712 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import numpy as np -import scipy as sp -import scipy.stats as st -import itertools as it - - -def binomial_sign_test(*args): - """ - Performs a binomial sign test for two dependent samples. - Tests the hypothesis that the two dependent samples represent two different populations. - - Parameters - ---------- - sample1, sample2: array_like - The sample measurements for each group. - - Returns - ------- - B-value : float - The computed B-value of the test. - p-value : float - The associated p-value from the B-distribution. - - References - ---------- - D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 19: The Binomial Sign Test for Two Dependent Samples - """ - k = len(args) - if k != 2: - raise ValueError('The test needs two samples') - n = len(args[0]) - - d_plus = 0 - d_minus = 0 - for i in range(n): - # Zero differences are eliminated - if args[0][i] < args[1][i]: - d_plus = d_plus + 1 - elif args[0][i] > args[1][i]: - d_minus = d_minus + 1 - - x = max(d_plus, d_minus) - n = d_plus + d_minus - - # Two-tailed of the smallest p-value - p_value = 2 * (1 - st.binom.cdf(x, n, 0.5)) - - return x, p_value - - -def friedman_test(*args): - """ - Performs a Friedman ranking test. - Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values. - - Parameters - ---------- - sample1, sample2, ... : array_like - The sample measurements for each group. - - Returns - ------- - F-value : float - The computed F-value of the test. - p-value : float - The associated p-value from the F-distribution. - rankings : array_like - The ranking for each group. - pivots : array_like - The pivotal quantities for each group. - - References - ---------- - M. Friedman, The use of ranks to avoid the assumption of normality implicit in the analysis of variance, Journal of the American Statistical Association 32 (1937) 674–701. - D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 25: The Friedman Two-Way Analysis of Variance by Ranks - """ - k = len(args) - if k < 2: - raise ValueError('Less than 2 levels') - n = len(args[0]) - if len(set([len(v) for v in args])) != 1: - raise ValueError('Unequal number of samples') - - rankings = [] - for i in range(n): - row = [col[i] for col in args] - row_sort = sorted(row) - rankings.append( - [row_sort.index(v) + 1 + (row_sort.count(v) - 1) / 2. for v in row]) - - rankings_avg = [sp.mean([case[j] for case in rankings]) for j in range(k)] - rankings_cmp = [r / sp.sqrt(k * (k + 1) / (6. * n)) for r in rankings_avg] - - chi2 = ((12 * n) / float((k * (k + 1)))) * \ - ((sp.sum(r**2 for r in rankings_avg)) - ((k * (k + 1)**2) / float(4))) - iman_davenport = ((n - 1) * chi2) / float((n * (k - 1) - chi2)) - - p_value = 1 - st.f.cdf(iman_davenport, k - 1, (k - 1) * (n - 1)) - - return iman_davenport, p_value, rankings_avg, rankings_cmp - - -def friedman_aligned_ranks_test(*args): - """ - Performs a Friedman aligned ranks ranking test. - Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values. - The difference with a friedman test is that it uses the median of each group to construct the ranking, which is useful when the number of samples is low. - - Parameters - ---------- - sample1, sample2, ... : array_like - The sample measurements for each group. - - Returns - ------- - Chi2-value : float - The computed Chi2-value of the test. - p-value : float - The associated p-value from the Chi2-distribution. - rankings : array_like - The ranking for each group. - pivots : array_like - The pivotal quantities for each group. - - References - ---------- - J.L. Hodges, E.L. Lehmann, Ranks methods for combination of independent experiments in analysis of variance, Annals of Mathematical Statistics 33 (1962) 482–497. - """ - k = len(args) - if k < 2: - raise ValueError('Less than 2 levels') - n = len(args[0]) - if len(set([len(v) for v in args])) != 1: - raise ValueError('Unequal number of samples') - - aligned_observations = [] - for i in range(n): - loc = sp.mean([col[i] for col in args]) - aligned_observations.extend([col[i] - loc for col in args]) - - aligned_observations_sort = sorted(aligned_observations) - - aligned_ranks = [] - for i in range(n): - row = [] - for j in range(k): - v = aligned_observations[i * k + j] - row.append(aligned_observations_sort.index(v) + 1 + - (aligned_observations_sort.count(v) - 1) / 2.) - aligned_ranks.append(row) - - rankings_avg = [sp.mean([case[j] for case in aligned_ranks]) - for j in range(k)] - rankings_cmp = [r / sp.sqrt(k * (n * k + 1) / 6.) for r in rankings_avg] - - r_i = [np.sum(case) for case in aligned_ranks] - r_j = [np.sum([case[j] for case in aligned_ranks]) for j in range(k)] - T = (k - 1) * (sp.sum(v**2 for v in r_j) - (k * n**2 / 4.) * (k * n + 1)**2) / \ - float(((k * n * (k * n + 1) * (2 * k * n + 1)) / 6.) - - (1. / float(k)) * sp.sum(v**2 for v in r_i)) - - p_value = 1 - st.chi2.cdf(T, k - 1) - - return T, p_value, rankings_avg, rankings_cmp - - -def quade_test(*args): - """ - Performs a Quade ranking test. - Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values. - The difference with a friedman test is that it uses the median for each sample to wiehgt the ranking. - - Parameters - ---------- - sample1, sample2, ... : array_like - The sample measurements for each group. - - Returns - ------- - F-value : float - The computed F-value of the test. - p-value : float - The associated p-value from the F-distribution. - rankings : array_like - The ranking for each group. - pivots : array_like - The pivotal quantities for each group. - - References - ---------- - D. Quade, Using weighted rankings in the analysis of complete blocks with additive block effects, Journal of the American Statistical Association 74 (1979) 680–683. - """ - k = len(args) - if k < 2: - raise ValueError('Less than 2 levels') - n = len(args[0]) - if len(set([len(v) for v in args])) != 1: - raise ValueError('Unequal number of samples') - - rankings = [] - ranges = [] - for i in range(n): - row = [col[i] for col in args] - ranges.append(max(row) - min(row)) - row_sort = sorted(row) - rankings.append( - [row_sort.index(v) + 1 + (row_sort.count(v) - 1) / 2. for v in row]) - - ranges_sort = sorted(ranges) - ranking_cases = [ranges_sort.index( - v) + 1 + (ranges_sort.count(v) - 1) / 2. for v in ranges] - - S = [] - W = [] - for i in range(n): - S.append([ranking_cases[i] * (r - (k + 1) / 2.) for r in rankings[i]]) - W.append([ranking_cases[i] * r for r in rankings[i]]) - - Sj = [np.sum(row[j] for row in S) for j in range(k)] - Wj = [np.sum(row[j] for row in W) for j in range(k)] - - rankings_avg = [w / (n * (n + 1) / 2.) for w in Wj] - rankings_cmp = [r / sp.sqrt(k * (k + 1) * (2 * n + 1) - * (k - 1) / (18. * n * (n + 1))) for r in rankings_avg] - - A = sp.sum(S[i][j]**2 for i in range(n) for j in range(k)) - B = sp.sum(s**2 for s in Sj) / float(n) - F = (n - 1) * B / (A - B) - - p_value = 1 - st.f.cdf(F, k - 1, (k - 1) * (n - 1)) - - return F, p_value, rankings_avg, rankings_cmp - - -def bonferroni_dunn_test(ranks, control=None): - """ - Performs a Bonferroni-Dunn post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of the control method is different to each of the other methods. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - control : string optional - The name of the control method (one vs all), default None (all vs all) - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - O.J. Dunn, Multiple comparisons among means, Journal of the American Statistical Association 56 (1961) 52–64. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - if not control: - control_i = values.index(min(values)) - else: - control_i = keys.index(control) - - comparisons = [keys[control_i] + " vs " + keys[i] - for i in range(k) if i != control_i] - z_values = [abs(values[control_i] - values[i]) - for i in range(k) if i != control_i] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - adj_p_values = [min((k - 1) * p_value, 1) for p_value in p_values] - - return comparisons, z_values, p_values, adj_p_values - - -def holm_test(ranks, control=None): - """ - Performs a Holm post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of the control method is different to each of the other methods. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - control : string optional - The name of the control method (one vs all), default None (all vs all) - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - O.J. S. Holm, A simple sequentially rejective multiple test procedure, Scandinavian Journal of Statistics 6 (1979) 65–70. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - if not control: - control_i = values.index(min(values)) - else: - control_i = keys.index(control) - - comparisons = [keys[control_i] + " vs " + keys[i] - for i in range(k) if i != control_i] - z_values = [abs(values[control_i] - values[i]) - for i in range(k) if i != control_i] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - adj_p_values = [min(max((k - (j + 1)) * p_values[j] - for j in range(i + 1)), 1) for i in range(k - 1)] - - return comparisons, z_values, p_values, adj_p_values - - -def hochberg_test(ranks, control=None): - """ - Performs a Hochberg post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of the control method is different to each of the other methods. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - control : string optional - The name of the control method, default the group with minimum ranking - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - Y. Hochberg, A sharper Bonferroni procedure for multiple tests of significance, Biometrika 75 (1988) 800–803. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - if not control: - control_i = values.index(min(values)) - else: - control_i = keys.index(control) - - comparisons = [keys[control_i] + " vs " + keys[i] - for i in range(k) if i != control_i] - z_values = [abs(values[control_i] - values[i]) - for i in range(k) if i != control_i] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - adj_p_values = [min(max((k - j) * p_values[j - 1] - for j in range(k - 1, i, -1)), 1) for i in range(k - 1)] - - return comparisons, z_values, p_values, adj_p_values - - -def li_test(ranks, control=None): - """ - Performs a Li post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of the control method is different to each of the other methods. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - control : string optional - The name of the control method, default the group with minimum ranking - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - J. Li, A two-step rejection procedure for testing multiple hypotheses, Journal of Statistical Planning and Inference 138 (2008) 1521–1527. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - if not control: - control_i = values.index(min(values)) - else: - control_i = keys.index(control) - - comparisons = [keys[control_i] + " vs " + keys[i] - for i in range(k) if i != control_i] - z_values = [abs(values[control_i] - values[i]) - for i in range(k) if i != control_i] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - adj_p_values = [p_values[i] / - (p_values[i] + 1 - p_values[-1]) for i in range(k - 1)] - - return comparisons, z_values, p_values, adj_p_values - - -def finner_test(ranks, control=None): - """ - Performs a Finner post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of the control method is different to each of the other methods. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - control : string optional - The name of the control method, default the group with minimum ranking - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - H. Finner, On a monotonicity problem in step-down multiple test procedures, Journal of the American Statistical Association 88 (1993) 920–923. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - if not control: - control_i = values.index(min(values)) - else: - control_i = keys.index(control) - - comparisons = [keys[control_i] + " vs " + keys[i] - for i in range(k) if i != control_i] - z_values = [abs(values[control_i] - values[i]) - for i in range(k) if i != control_i] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - adj_p_values = [min(max(1 - (1 - p_values[j])**((k - 1) / float(j + 1)) - for j in range(i + 1)), 1) for i in range(k - 1)] - - return comparisons, z_values, p_values, adj_p_values - - -def nemenyi_multitest(ranks): - """ - Performs a Nemenyi post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of each pair of groups are different. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - Bonferroni-Dunn: O.J. Dunn, Multiple comparisons among means, Journal of the American Statistical Association 56 (1961) 52–64. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - versus = list(it.combinations(range(k), 2)) - - comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] - z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - m = int(k * (k - 1) / 2.) - adj_p_values = [min(m * p_value, 1) for p_value in p_values] - - return comparisons, z_values, p_values, adj_p_values - - -def holm_multitest(ranks): - """ - Performs a Holm post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of each pair of groups are different. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - O.J. S. Holm, A simple sequentially rejective multiple test procedure, Scandinavian Journal of Statistics 6 (1979) 65–70. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - versus = list(it.combinations(range(k), 2)) - - comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] - z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - m = int(k * (k - 1) / 2.) - adj_p_values = [min(max((m - j) * p_values[j] - for j in range(i + 1)), 1) for i in range(m)] - - return comparisons, z_values, p_values, adj_p_values - - -def hochberg_multitest(ranks): - """ - Performs a Hochberg post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of each pair of groups are different. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - Y. Hochberg, A sharper Bonferroni procedure for multiple tests of significance, Biometrika 75 (1988) 800–803. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - versus = list(it.combinations(range(k), 2)) - - comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] - z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - m = int(k * (k - 1) / 2.) - adj_p_values = [max((m + 1 - j) * p_values[j - 1] - for j in range(m, i, -1))for i in range(m)] - - return comparisons, z_values, p_values, adj_p_values - - -def finner_multitest(ranks): - """ - Performs a Finner post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of each pair of groups are different. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - H. Finner, On a monotonicity problem in step-down multiple test procedures, Journal of the American Statistical Association 88 (1993) 920–923. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - versus = list(it.combinations(range(k), 2)) - - comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] - z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - m = int(k * (k - 1) / 2.) - adj_p_values = [min(max(1 - (1 - p_values[j])**(m / float(j + 1)) - for j in range(i + 1)), 1) for i in range(m)] - - return comparisons, z_values, p_values, adj_p_values - - -def _S(k): - """ - Helper function for the Shaffer test. - It obtains the number of independent test hypotheses when using an All vs All strategy using the number of groups to be compared. - """ - if k == 0 or k == 1: - return {0} - else: - result = set() - for j in reversed(range(1, k + 1)): - tmp = S(k - j) - for s in tmp: - result = result.union({sp.special.binom(j, 2) + s}) - return list(result) - - -def shaffer_multitest(ranks): - """ - Performs a Shaffer post-hoc test using the pivot quantities obtained by a ranking test. - Tests the hypothesis that the ranking of each pair of groups are different. - - Parameters - ---------- - pivots : dictionary_like - A dictionary with format 'groupname':'pivotal quantity' - - Returns - ---------- - Comparions : array-like - Strings identifier of each comparison with format 'group_i vs group_j' - Z-values : array-like - The computed Z-value statistic for each comparison. - p-values : array-like - The associated p-value from the Z-distribution wich depends on the index of the comparison - Adjusted p-values : array-like - The associated adjusted p-values wich can be compared with a significance level - - References - ---------- - J. Li, A two-step rejection procedure for testing multiple hypotheses, Journal of Statistical Planning and Inference 138 (2008) 1521–1527. - """ - k = len(ranks) - values = ranks.values() - keys = ranks.keys() - versus = list(it.combinations(range(k), 2)) - - m = int(k * (k - 1) / 2.) - A = _S(int((1 + sp.sqrt(1 + 4 * m * 2)) / 2)) - t = [max([a for a in A if a <= m - i]) for i in range(m)] - - comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] - z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] - p_values = [2 * (1 - st.norm.cdf(abs(z))) for z in z_values] - # Sort values by p_value so that p_0 < p_1 - p_values, z_values, comparisons = map( - list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) - adj_p_values = [min(max(t[j] * p_values[j] - for j in range(i + 1)), 1) for i in range(m)] - - return comparisons, z_values, p_values, adj_p_values