File: test_power.py

package info (click to toggle)
python-skbio 0.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 16,556 kB
  • ctags: 7,222
  • sloc: python: 42,085; ansic: 670; makefile: 180; sh: 10
file content (510 lines) | stat: -rw-r--r-- 21,831 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
# ----------------------------------------------------------------------------
# Copyright (c) 2013--, scikit-bio development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------

from unittest import TestCase, main

import numpy as np
import numpy.testing as npt
import pandas as pd
from scipy.stats import kruskal

from skbio.stats.power import (subsample_power,
                               subsample_paired_power,
                               _check_nans,
                               confidence_bound,
                               _calculate_power,
                               _compare_distributions,
                               _calculate_power_curve,
                               _check_subsample_power_inputs,
                               _identify_sample_groups,
                               _draw_paired_samples,
                               _get_min_size,
                               paired_subsamples
                               )


class PowerAnalysisTest(TestCase):

    def setUp(self):

        # Defines a testing functions
        def test_meta(ids, meta, cat, div):
            """Checks thhe div metric with a kruskal wallis"""
            out = [meta.loc[id_, div] for id_ in ids]
            return kruskal(*out)[1]

        def meta_f(x):
            """Applies `test_meta` to a result"""
            return test_meta(x, self.meta, 'INT', 'DIV')

        def f(x):
            """returns the p value of a kruskal wallis test"""
            return kruskal(*x)[1]

        self.test_meta = test_meta
        self.f = f
        self.meta_f = meta_f
        self.num_p = 1

        # Sets the random seed
        np.random.seed(5)
        # Sets up the distributions of data for use
        self.s1 = np.arange(0, 10, 1)
        # Sets up two distributions which will never be equal by a rank-sum
        # test.
        self.samps = [np.ones((10))/10., np.ones((10))]
        self.pop = [np.arange(0, 10, 0.1), np.arange(0, 20, 0.2)]
        # Sets up a vector of alpha values
        self.alpha = np.power(10, np.array([-1, -1.301, -2, -3])).round(3)
        # Sets up a vector of samples
        self.num_samps = np.arange(10, 100, 10)
        # Sets up a mapping file
        meta = {'GW': {'INT': 'N', 'ABX': np.nan, 'DIV': 19.5, 'AGE': '30s',
                       'SEX': 'M'},
                'CB': {'INT': 'Y', 'ABX': np.nan, 'DIV': 42.7, 'AGE': '30s',
                       'SEX': 'M'},
                'WM': {'INT': 'N', 'ABX': 'N', 'DIV': 27.5, 'AGE': '20s',
                       'SEX': 'F'},
                'MH': {'INT': 'Y', 'ABX': 'N', 'DIV': 62.3, 'AGE': '30s',
                       'SEX': 'F'},
                'CD': {'INT': 'Y', 'ABX': 'Y', 'DIV': 36.4, 'AGE': '40s',
                       'SEX': 'F'},
                'LF': {'INT': 'Y', 'ABX': 'N', 'DIV': 50.2, 'AGE': '20s',
                       'SEX': 'M'},
                'PP': {'INT': 'N', 'ABX': 'Y', 'DIV': 10.8, 'AGE': '30s',
                       'SEX': 'F'},
                'MM': {'INT': 'N', 'ABX': 'N', 'DIV': 55.6, 'AGE': '40s',
                       'SEX': 'F'},
                'SR': {'INT': 'N', 'ABX': 'Y', 'DIV': 2.2, 'AGE': '20s',
                       'SEX': 'M'},
                'TS': {'INT': 'N', 'ABX': 'Y', 'DIV': 16.1, 'AGE': '40s',
                       'SEX': 'M'},
                'PC': {'INT': 'Y', 'ABX': 'N', 'DIV': 82.6, 'AGE': '40s',
                       'SEX': 'M'},
                'NR': {'INT': 'Y', 'ABX': 'Y', 'DIV': 15.7, 'AGE': '20s',
                       'SEX': 'F'}}
        self.meta = pd.DataFrame.from_dict(meta, orient='index')
        self.meta_pairs = {0: [['GW', 'SR', 'TS'], ['CB', 'LF', 'PC']],
                           1: [['MM', 'PP', 'WM'], ['CD', 'MH', 'NR']]}
        self.pair_index = np.array([0, 0, 0, 1, 1, 1])
        self.counts = np.array([5, 15, 25, 35, 45])
        self.powers = [np.array([[0.105, 0.137, 0.174, 0.208, 0.280],
                                 [0.115, 0.135, 0.196, 0.204, 0.281],
                                 [0.096, 0.170, 0.165, 0.232, 0.256],
                                 [0.122, 0.157, 0.202, 0.250, 0.279],
                                 [0.132, 0.135, 0.173, 0.203, 0.279]]),
                       np.array([[0.157, 0.345, 0.522, 0.639, 0.739],
                                 [0.159, 0.374, 0.519, 0.646, 0.757],
                                 [0.161, 0.339, 0.532, 0.634, 0.745],
                                 [0.169, 0.372, 0.541, 0.646, 0.762],
                                 [0.163, 0.371, 0.522, 0.648, 0.746]]),
                       np.array([[0.276, 0.626, 0.865, 0.927, 0.992],
                                 [0.267, 0.667, 0.848, 0.937, 0.978],
                                 [0.236, 0.642, 0.850, 0.935, 0.977],
                                 [0.249, 0.633, 0.828, 0.955, 0.986],
                                 [0.249, 0.663, 0.869, 0.951, 0.985]])]
        self.power_alpha = 0.1
        self.effects = np.array([0.15245, 0.34877, 0.55830])
        self.bounds = np.array([0.01049, 0.00299, 0.007492])
        self.labels = np.array(['Age', 'Intervenption', 'Antibiotics'])
        self.cats = np.array(['AGE', 'INT', 'ABX'])
        self.cat = "AGE"
        self.control_cats = ['INT', 'ABX']

    def test_subsample_power_defaults(self):
        test_p, test_c = subsample_power(self.f, self.pop,
                                         num_iter=10, num_runs=5)
        self.assertEqual(test_p.shape, (5, 4))
        npt.assert_array_equal(np.array([10, 20, 30, 40]), test_c)

    def test_subsample_power_counts(self):
        test_p, test_c = subsample_power(self.f,
                                         samples=self.pop,
                                         num_iter=10,
                                         num_runs=2,
                                         min_counts=5)
        self.assertEqual(test_p.shape, (2, 5))
        npt.assert_array_equal(np.arange(5, 50, 10), test_c)

    def test_subsample_power_matches(self):
        test_p, test_c = subsample_power(self.f,
                                         samples=self.pop,
                                         num_iter=10,
                                         num_runs=5,
                                         draw_mode="matched")
        self.assertEqual(test_p.shape, (5, 4))
        npt.assert_array_equal(np.array([10, 20, 30, 40]), test_c)

    def test_subsample_power_multi_p(self):
        test_p, test_c = subsample_power(lambda x: np.array([0.5, 0.5]),
                                         samples=self.pop,
                                         num_iter=10,
                                         num_runs=5)
        self.assertEqual(test_p.shape, (5, 4, 2))
        npt.assert_array_equal(np.array([10, 20, 30, 40]), test_c)

    def test_subsample_paired_power(self):
        known_c = np.array([1, 2, 3, 4])
        # Sets up the handling values
        cat = 'INT'
        control_cats = ['SEX']

        # Tests for the control cats
        test_p, test_c = subsample_paired_power(self.meta_f,
                                                meta=self.meta,
                                                cat=cat,
                                                control_cats=control_cats,
                                                counts_interval=1,
                                                num_iter=10,
                                                num_runs=2)
        # Test the output shapes are sane
        self.assertEqual(test_p.shape, (2, 4))
        npt.assert_array_equal(known_c, test_c)

    def test_subsample_paired_power_multi_p(self):
        def f(x):
            return np.array([0.5, 0.5, 0.005])
        cat = 'INT'
        control_cats = ['SEX']
        # Tests for the control cats
        test_p, test_c = subsample_paired_power(f,
                                                meta=self.meta,
                                                cat=cat,
                                                control_cats=control_cats,
                                                counts_interval=1,
                                                num_iter=10,
                                                num_runs=2)
        self.assertEqual(test_p.shape, (2, 4, 3))

    def test_check_nans_str(self):
        self.assertTrue(_check_nans('string'))

    def test_check_nans_num(self):
        self.assertTrue(_check_nans(4.2))

    def test__check_nans_nan(self):
        self.assertFalse(_check_nans(np.nan))

    def test__check_nans_clean_list(self):
        self.assertTrue(_check_nans(['foo', 'bar'], switch=True))

    def test__check_nans_list_nan(self):
        self.assertFalse(_check_nans(['foo', np.nan], switch=True))

    def test__check_str_error(self):
        with self.assertRaises(TypeError):
            _check_nans(self.f)

    def test__get_min_size_strict(self):
        known = 5
        test = _get_min_size(self.meta, 'INT', ['ABX', 'SEX'], ['Y', 'N'],
                             True)
        self.assertEqual(test, known)

    def test__get_min_size_relaxed(self):
        known = 5
        test = _get_min_size(self.meta, 'INT', ['ABX', 'SEX'], ['Y', 'N'],
                             False)
        self.assertEqual(known, test)

    def test_confidence_bound_default(self):
        # Sets the know confidence bound
        known = 2.2830070
        test = confidence_bound(self.s1)
        npt.assert_almost_equal(test, known, 3)

    def test_confidence_bound_df(self):
        known = 2.15109
        test = confidence_bound(self.s1, df=15)
        npt.assert_almost_equal(known, test, 3)

    def test_confidence_bound_alpha(self):
        known = 3.2797886
        test = confidence_bound(self.s1, alpha=0.01)
        npt.assert_almost_equal(known, test, 3)

    def test_confidence_bound_nan(self):
        # Sets the value to test
        samples = np.array([[4, 3.2, 3.05],
                            [2, 2.8, 2.95],
                            [5, 2.9, 3.07],
                            [1, 3.1, 2.93],
                            [3, np.nan, 3.00]])
        # Sets the know value
        known = np.array([2.2284, 0.2573, 0.08573])
        # Tests the function
        test = confidence_bound(samples, axis=0)
        npt.assert_almost_equal(known, test, 3)

    def test_confidence_bound_axis_none(self):
        # Sets the value to test
        samples = np.array([[4, 3.2, 3.05],
                            [2, 2.8, 2.95],
                            [5, 2.9, 3.07],
                            [1, 3.1, 2.93],
                            [3, np.nan, 3.00]])
        # Sest the known value
        known = 0.52852
        # Tests the output
        test = confidence_bound(samples, axis=None)
        npt.assert_almost_equal(known, test, 3)

    def test__calculate_power(self):
        # Sets up the values to test
        crit = 0.025
        # Sets the known value
        known = 0.5
        # Calculates the test value
        test = _calculate_power(self.alpha, crit)
        # Checks the test value
        npt.assert_almost_equal(known, test)

    def test__calculate_power_n(self):
        crit = 0.025
        known = np.array([0.5, 0.5])
        alpha = np.vstack((self.alpha, self.alpha))
        test = _calculate_power(alpha, crit)
        npt.assert_almost_equal(known, test)

    def test__compare_distributions_sample_counts_error(self):
        with self.assertRaises(ValueError):
            _compare_distributions(self.f, [self.pop[0][:5], self.pop[1]], 1,
                                   counts=25)

    def test__compare_distributions_all_mode(self):
        known = np.ones((100))*0.0026998
        test = _compare_distributions(self.f, self.samps, 1, num_iter=100)
        npt.assert_allclose(known, test, 5)

    def test__compare_distributions_matched_mode(self):
        # Sets the known value
        known_mean = 0.162195
        known_std = 0.121887
        known_shape = (100,)
        # Tests the sample value
        test = _compare_distributions(self.f, self.pop, self.num_p,
                                      mode='matched', num_iter=100)
        npt.assert_allclose(known_mean, test.mean(), rtol=0.1, atol=0.02)
        npt.assert_allclose(known_std, test.std(), rtol=0.1, atol=0.02)
        self.assertEqual(known_shape, test.shape)

    def test__compare_distributions_draw_mode(self):
        draw_mode = 'Ultron'
        with self.assertRaises(ValueError):
            _check_subsample_power_inputs(self.f, self.pop, draw_mode,
                                          self.num_p)

    def test__compare_distributions_multiple_returns(self):
        known = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])

        def f(x):
            return np.array([1, 2, 3])

        test = _compare_distributions(f, self.pop, 3, mode='matched',
                                      num_iter=3)
        npt.assert_array_equal(known, test)

    def test_check_subsample_power_inputs_matched_mode(self):
        with self.assertRaises(ValueError):
            _check_subsample_power_inputs(self.f,
                                          samples=[np.ones((2)), np.ones((5))],
                                          draw_mode="matched")

    def test_check_subsample_power_inputs_counts(self):
        with self.assertRaises(ValueError):
            _check_subsample_power_inputs(self.f,
                                          samples=[np.ones((3)), np.ones((5))],
                                          min_counts=5,
                                          counts_interval=1000,
                                          max_counts=7)

    def test_check_subsample_power_inputs_ratio(self):
        with self.assertRaises(ValueError):
            _check_subsample_power_inputs(self.f,
                                          self.samps,
                                          ratio=np.array([1, 2, 3]))

    def test_check_subsample_power_inputs_test(self):
        # Defines a test function
        def test(x):
            return 'Hello World!'
        with self.assertRaises(TypeError):
            _check_subsample_power_inputs(test, self.samps)

    def test_check_sample_power_inputs(self):
        # Defines the know returns
        known_num_p = 1
        known_ratio = np.ones((2))
        known_counts = np.arange(2, 10, 2)
        # Runs the code for the returns
        test_ratio, test_num_p, test_counts = \
            _check_subsample_power_inputs(self.f,
                                          self.samps,
                                          counts_interval=2,
                                          max_counts=10)
        # Checks the returns are sane
        self.assertEqual(known_num_p, test_num_p)
        npt.assert_array_equal(known_ratio, test_ratio)
        npt.assert_array_equal(known_counts, test_counts)

    def test__calculate_power_curve_ratio_error(self):
        with self.assertRaises(ValueError):
            _calculate_power_curve(self.f, self.pop, self.num_samps,
                                   ratio=np.array([0.1, 0.2, 0.3]),
                                   num_iter=100)

    def test__calculate_power_curve_default(self):
        # Sets the known output
        known = np.array([0.509, 0.822, 0.962, 0.997, 1.000, 1.000, 1.000,
                          1.000, 1.000])
        # Generates the test values
        test = _calculate_power_curve(self.f,
                                      self.pop,
                                      self.num_samps,
                                      num_iter=100)
        # Checks the samples returned sanely
        npt.assert_allclose(test, known, rtol=0.1, atol=0.01)

    def test__calculate_power_curve_alpha(self):
        # Sets the know output
        known = np.array([0.31, 0.568, 0.842, 0.954, 0.995, 1.000, 1.000,
                          1.000, 1.000])

        # Generates the test values
        test = _calculate_power_curve(self.f,
                                      self.pop,
                                      self.num_samps,
                                      alpha=0.01,
                                      num_iter=100)

        # Checks the samples returned sanely
        npt.assert_allclose(test, known, rtol=0.1, atol=0.1)

    def test__calculate_power_curve_ratio(self):
        # Sets the know output
        known = np.array([0.096, 0.333, 0.493, 0.743, 0.824, 0.937, 0.969,
                          0.996, 0.998])

        # Generates the test values
        test = _calculate_power_curve(self.f,
                                      self.pop,
                                      self.num_samps,
                                      ratio=np.array([0.25, 0.75]),
                                      num_iter=100)

        # Checks the samples returned sanely
        npt.assert_allclose(test, known, rtol=0.1, atol=0.1)

    def test_paired_subsamples_default(self):
        # Sets the known np.array set
        known_array = [{'MM', 'SR', 'TS', 'GW', 'PP', 'WM'},
                       {'CD', 'LF', 'PC', 'CB', 'MH', 'NR'}]

        # Gets the test value
        cat = 'INT'
        control_cats = ['SEX', 'AGE']
        test_array = paired_subsamples(self.meta, cat, control_cats)
        self.assertEqual(known_array[0], set(test_array[0]))
        self.assertEqual(known_array[1], set(test_array[1]))

    def test_paired_subsamples_break(self):
        # Sets known np.array set
        known_array = [np.array([]), np.array([])]
        # Gets the test value
        cat = 'ABX'
        control_cats = ['SEX', 'AGE', 'INT']
        test_array = paired_subsamples(self.meta, cat, control_cats)
        npt.assert_array_equal(known_array, test_array)

    def test_paired_subsample_undefined(self):
        known_array = np.zeros((2, 0))
        cat = 'INT'
        order = ['Y', 'N']
        control_cats = ['AGE', 'ABX', 'SEX']
        test_array = paired_subsamples(self.meta, cat, control_cats,
                                       order=order)
        npt.assert_array_equal(test_array, known_array)

    def test_paired_subsample_fewer(self):
        # Set known value
        known_array = {'PP', 'MH', 'CD', 'PC', 'TS', 'MM'}
        # Sets up test values
        cat = 'AGE'
        order = ['30s', '40s']
        control_cats = ['ABX']
        test_array = paired_subsamples(self.meta, cat, control_cats,
                                       order=order)
        for v in test_array[0]:
            self.assertTrue(v in known_array)
        for v in test_array[1]:
            self.assertTrue(v in known_array)

    def test_paired_subsamples_not_strict(self):
        known_array = [{'WM', 'MM', 'GW', 'SR', 'TS'},
                       {'LF', 'PC', 'CB', 'NR', 'CD'}]

        # Gets the test values
        cat = 'INT'
        control_cats = ['ABX', 'AGE']
        test_array = paired_subsamples(self.meta, cat, control_cats,
                                       strict_match=False)
        self.assertEqual(set(test_array[0]), known_array[0])
        self.assertEqual(set(test_array[1]), known_array[1])

    def test__identify_sample_groups(self):
        # Defines the know values
        known_pairs = {0: [['MM'], ['CD']],
                       1: [['SR'], ['LF']],
                       2: [['TS'], ['PC']],
                       3: [['GW'], ['CB']],
                       4: [['PP'], ['MH']],
                       5: [['WM'], ['NR']]}
        known_index = np.array([0, 1, 2, 3, 4, 5])
        test_pairs, test_index = _identify_sample_groups(self.meta,
                                                         'INT',
                                                         ['SEX', 'AGE'],
                                                         order=['N', 'Y'],
                                                         strict_match=True)
        self.assertEqual(known_pairs.keys(), test_pairs.keys())
        self.assertEqual(sorted(known_pairs.values()),
                         sorted(test_pairs.values()))
        npt.assert_array_equal(known_index, test_index)

    def test__identify_sample_groups_not_strict(self):
        # Defines the know values
        known_pairs = {1: [np.array(['PP'], dtype=object),
                           np.array(['CD', 'NR'], dtype=object)],
                       0: [np.array(['MM', 'WM'], dtype=object),
                           np.array(['MH'], dtype=object)],
                       2: [np.array(['GW'], dtype=object),
                           np.array(['CB'], dtype=object)]}
        known_index = np.array([0, 1, 2])
        test_pairs, test_index = _identify_sample_groups(self.meta,
                                                         'INT',
                                                         ['SEX', 'ABX'],
                                                         order=['N', 'Y'],
                                                         strict_match=False)
        self.assertEqual(known_pairs.keys(), test_pairs.keys())

        for k in known_pairs:
            for i in range(2):
                npt.assert_array_equal(known_pairs[k][i], test_pairs[k][i])
        npt.assert_array_equal(known_index, test_index)

    def test__draw_paired_samples(self):
        num_samps = 3
        known_sets = [{'GW', 'SR', 'TS', 'MM', 'PP', 'WM'},
                      {'CB', 'LF', 'PC', 'CD', 'MH', 'NR'}]
        test_samps = _draw_paired_samples(self.meta_pairs, self.pair_index,
                                          num_samps)
        for i, t in enumerate(test_samps):
            self.assertTrue(set(t).issubset(known_sets[i]))


if __name__ == '__main__':
    main()