File: permutations.py

package info (click to toggle)
python-mne 0.19.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 100,440 kB
  • sloc: python: 120,243; pascal: 1,861; makefile: 225; sh: 15
file content (157 lines) | stat: -rw-r--r-- 6,294 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""T-test with permutations."""

# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
#
# License: Simplified BSD

from math import sqrt
import numpy as np

from ..utils import check_random_state, verbose, logger
from ..parallel import parallel_func


def _max_stat(X, X2, perms, dof_scaling):
    """Aux function for permutation_t_test (for parallel comp)."""
    n_samples = len(X)
    mus = np.dot(perms, X) / float(n_samples)
    stds = np.sqrt(X2[None, :] - mus * mus) * dof_scaling  # std with splitting
    max_abs = np.max(np.abs(mus) / (stds / sqrt(n_samples)), axis=1)  # t-max
    return max_abs


@verbose
def permutation_t_test(X, n_permutations=10000, tail=0, n_jobs=1,
                       seed=None, verbose=None):
    """One sample/paired sample permutation test based on a t-statistic.

    This function can perform the test on one variable or
    simultaneously on multiple variables. When applying the test to multiple
    variables, the "tmax" method is used for adjusting the p-values of each
    variable for multiple comparisons. Like Bonferroni correction, this method
    adjusts p-values in a way that controls the family-wise error rate.
    However, the permutation method will be more
    powerful than Bonferroni correction when different variables in the test
    are correlated (see [1]_).

    Parameters
    ----------
    X : array, shape (n_samples, n_tests)
        Samples (observations) by number of tests (variables).
    n_permutations : int | 'all'
        Number of permutations. If n_permutations is 'all' all possible
        permutations are tested. It's the exact test, that
        can be untractable when the number of samples is big (e.g. > 20).
        If n_permutations >= 2**n_samples then the exact test is performed.
    tail : -1 or 0 or 1 (default = 0)
        If tail is 1, the alternative hypothesis is that the
        mean of the data is greater than 0 (upper tailed test).  If tail is 0,
        the alternative hypothesis is that the mean of the data is different
        than 0 (two tailed test).  If tail is -1, the alternative hypothesis
        is that the mean of the data is less than 0 (lower tailed test).
    %(n_jobs)s
    %(seed)s
    %(verbose)s

    Returns
    -------
    T_obs : array of shape [n_tests]
        T-statistic observed for all variables

    p_values : array of shape [n_tests]
        P-values for all the tests (aka variables)

    H0 : array of shape [n_permutations]
        T-statistic obtained by permutations and t-max trick for multiple
        comparison.

    Notes
    -----
    If ``n_permutations >= 2 ** (n_samples - (tail == 0))``,
    ``n_permutations`` and ``seed`` will be ignored since an exact test
    (full permutation test) will be performed.

    References
    ----------
    .. [1] Nichols, T. E. & Holmes, A. P. (2002). Nonparametric permutation
       tests for functional neuroimaging: a primer with examples.
       Human Brain Mapping, 15, 1-25.
    """
    from .cluster_level import _get_1samp_orders
    n_samples, n_tests = X.shape
    X2 = np.mean(X ** 2, axis=0)  # precompute moments
    mu0 = np.mean(X, axis=0)
    dof_scaling = sqrt(n_samples / (n_samples - 1.0))
    std0 = np.sqrt(X2 - mu0 ** 2) * dof_scaling  # get std with var splitting
    T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples))
    rng = check_random_state(seed)
    orders, _, extra = _get_1samp_orders(n_samples, n_permutations, tail, rng)
    perms = 2 * np.array(orders) - 1  # from 0, 1 -> 1, -1
    logger.info('Permuting %d times%s...' % (len(orders), extra))
    parallel, my_max_stat, n_jobs = parallel_func(_max_stat, n_jobs)
    max_abs = np.concatenate(parallel(my_max_stat(X, X2, p, dof_scaling)
                                      for p in np.array_split(perms, n_jobs)))
    max_abs = np.concatenate((max_abs, [np.abs(T_obs).max()]))
    H0 = np.sort(max_abs)
    if tail == 0:
        p_values = (H0 >= np.abs(T_obs[:, np.newaxis])).mean(-1)
    elif tail == 1:
        p_values = (H0 >= T_obs[:, np.newaxis]).mean(-1)
    elif tail == -1:
        p_values = (-H0 <= T_obs[:, np.newaxis]).mean(-1)
    return T_obs, p_values, H0


def bootstrap_confidence_interval(arr, ci=.95, n_bootstraps=2000,
                                  stat_fun='mean', random_state=None):
    """Get confidence intervals from non-parametric bootstrap.

    Parameters
    ----------
    arr : ndarray, shape (n_samples, ...)
        The input data on which to calculate the confidence interval.
    ci : float
        Level of the confidence interval between 0 and 1.
    n_bootstraps : int
        Number of bootstraps
    stat_fun : str | callable
        Can be "mean", "median", or a callable operating along `axis=0`.
    random_state : int | float | array_like | None
        The seed at which to initialize the bootstrap.

    Returns
    -------
    cis : ndarray, shape (2, ...)
        Containing the lower boundary of the CI at `cis[0, ...]` and the upper
        boundary of the CI at `cis[1, ...]`.


    """
    if stat_fun == "mean":
        def stat_fun(x):
            return x.mean(axis=0)
    elif stat_fun == 'median':
        def stat_fun(x):
            return np.median(x, axis=0)
    elif not callable(stat_fun):
        raise ValueError("stat_fun must be 'mean', 'median' or callable.")
    n_trials = arr.shape[0]
    indices = np.arange(n_trials, dtype=int)  # BCA would be cool to have too
    rng = check_random_state(random_state)
    boot_indices = rng.choice(indices, replace=True,
                              size=(n_bootstraps, len(indices)))
    stat = np.array([stat_fun(arr[inds]) for inds in boot_indices])
    ci = (((1 - ci) / 2) * 100, ((1 - ((1 - ci) / 2))) * 100)
    ci_low, ci_up = np.percentile(stat, ci, axis=0)
    return np.array([ci_low, ci_up])


def _ci(arr, ci=.95, method="bootstrap", n_bootstraps=2000, random_state=None):
    """Calculate confidence interval. Aux function for plot_compare_evokeds."""
    if method == "bootstrap":
        return bootstrap_confidence_interval(arr, ci=ci,
                                             n_bootstraps=n_bootstraps,
                                             random_state=random_state)
    else:
        from . import _parametric_ci
        return _parametric_ci(arr, ci=ci)