File: test_rarefaction.py

package info (click to toggle)
python-cogent 1.5.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 16,424 kB
  • ctags: 24,343
  • sloc: python: 134,200; makefile: 100; ansic: 17; sh: 10
file content (173 lines) | stat: -rw-r--r-- 6,986 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python
#file test_parse.py
from numpy import array
from cogent.util.unit_test import TestCase, main
from cogent.maths.stats.rarefaction import (subsample,
                                            naive_histogram,
                                            wrap_numpy_histogram,
                                            rarefaction,
                                            subsample_freq_dist_nonzero,
                                            subsample_random,
                                            subsample_multinomial)

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"

class TopLevelTests(TestCase):
    """Tests of top-level functions"""

    def test_subsample(self):
        """subsample should return a random subsample of a vector"""
        a = array([0,5,0])
        self.assertEqual(subsample(a,5), array([0,5,0]))
        self.assertEqual(subsample(a,2), array([0,2,0]))
        b = array([2,0,1])
        
        # selecting 2 counts from the vector 1000 times yields each of the 
        # two possible results at least once each
        b = array([2,0,1])
        actual = {}
        for i in range(1000):
            e = subsample(b,2)
            actual[tuple(e)] = None
        self.assertEqual(actual, {(1,0,1):None,(2,0,0):None})
        
        obs = subsample(b,2)
        assert (obs == array([1,0,1])).all() or (obs ==  array([2,0,0])).all()
    
    def test_subsample_freq_dist_nonzero(self):
        """subsample_freq_dist_nonzero should return a random subsample of a vector
        """
        a = array([0,5,0])
        self.assertEqual(subsample_freq_dist_nonzero(a,5), array([0,5,0]))
        self.assertEqual(subsample_freq_dist_nonzero(a,2), array([0,2,0]))
        
        # selecting 35 counts from the vector 1000 times yields each at least
        # two different results
        b = array([2,0,1,2,1,8,6,0,3,3,5,0,0,0,5])
        actual = {}
        for i in range(100):
            e = subsample_freq_dist_nonzero(b,35)
            self.assertTrue(e.sum(),35)
            actual[tuple(e)] = None
        self.assertTrue(len(actual) > 1)
        
        # selecting 2 counts from the vector 1000 times yields each of the 
        # two possible results at least once each (note that an issue with an 
        # inital buggy version of subsample_freq_dist_nonzero was detected with
        # this test, so don't remove - )
        b = array([2,0,1])
        actual = {}
        for i in range(1000):
            e = subsample_freq_dist_nonzero(b,2)
            actual[tuple(e)] = None
            self.assertTrue(e.sum() == 2)
        self.assertEqual(actual, {(1,0,1):None,(2,0,0):None})

    def test_subsample_random(self):
        """subsample_random should return a random subsample of a vector
        """
        a = array([0,5,0])
        self.assertEqual(subsample_random(a,5), array([0,5,0]))
        self.assertEqual(subsample_random(a,2), array([0,2,0]))
        
        # selecting 35 counts from the vector 1000 times yields each at least
        # two different results
        b = array([2,0,1,2,1,8,6,0,3,3,5,0,0,0,5])
        actual = {}
        for i in range(100):
            e = subsample_random(b,35)
            self.assertTrue(e.sum(),35)
            actual[tuple(e)] = None
        self.assertTrue(len(actual) > 1)
        
        # selecting 2 counts from the vector 1000 times yields each of the 
        # two possible results at least once each
        b = array([2,0,1])
        actual = {}
        for i in range(1000):
            e = subsample_random(b,2)
            actual[tuple(e)] = None
            self.assertTrue(e.sum() == 2)
        self.assertEqual(actual, {(1,0,1):None,(2,0,0):None})

    def test_subsample_multinomial(self):
        """subsample_multinomial should return a random subsample of a vector
        """
        # selecting 35 counts from the vector 1000 times yields each at least
        # two different results
        actual = {}
        for i in range(100):
            b = array([2,0,1,2,1,8,6,0,3,3,5,0,0,0,5])
            e = subsample_multinomial(b,35)
            self.assertTrue(e.sum(),35)
            actual[tuple(e)] = None
        self.assertTrue(len(actual) > 1)

    def test_naive_histogram(self):
        """naive_histogram should produce expected result"""
        vals = array([1,0,0,3])
        self.assertEqual(naive_histogram(vals), array([2,1,0,1]))
        self.assertEqual(naive_histogram(vals, 4), array([2,1,0,1,0]))

    def test_wrap_numpy_histogram(self):
        """wrap_numpy_histogram should provide expected result"""
        vals = array([1,0,0,3])
        h_f = wrap_numpy_histogram(3)
        self.assertEqual(h_f(vals), array([2,1,0,1]))
        h_f = wrap_numpy_histogram(4)
        self.assertEqual(h_f(vals, 4), array([2,1,0,1,0]))

    def test_rarefaction(self):
        """rarefaction should produce expected curve"""
        vals = array([5,0,0,3,0,10], dtype=int)
        res = [r.copy() for r in rarefaction(vals, stride=1)]
        self.assertEqual(len(res), 18)
        for i, r in enumerate(res):
            self.assertEqual(r.sum(), i+1)
            #make sure we didn't add any bad counts
            for pos in [1,2,4]:
                self.assertEqual(r[pos], 0)
        #when we get to end should recapture orig vals
        self.assertEqual(r, vals)
        res = [r.copy() for r in rarefaction(vals, stride=3)]
        self.assertEqual(len(res), 6)
        for i, r in enumerate(res):
            self.assertEqual(r.sum(), 3*(i+1))
            #make sure we didn't add any bad counts
            for pos in [1,2,4]:
                self.assertEqual(r[pos], 0)
        #when we get to end should recapture orig vals
        self.assertEqual(r, vals)

        #repeat everything above using alt. input format
        orig_vals = vals.copy()
        vals = array([0,0,0,0,0,3,3,3,5,5,5,5,5,5,5,5,5,5], dtype=int)
        res = [r.copy() for r in rarefaction(vals, stride=1, is_counts=False)]
        self.assertEqual(len(res), 18)
        for i, r in enumerate(res):
            self.assertEqual(r.sum(), i+1)
            #make sure we didn't add any bad counts
            for pos in [1,2,4]:
                self.assertEqual(r[pos], 0)
        #when we get to end should recapture orig vals
        self.assertEqual(r, orig_vals)
        res = [r.copy() for r in rarefaction(vals, stride=3, is_counts=False)]
        self.assertEqual(len(res), 6)
        for i, r in enumerate(res):
            self.assertEqual(r.sum(), 3*(i+1))
            #make sure we didn't add any bad counts
            for pos in [1,2,4]:
                self.assertEqual(r[pos], 0)
        #when we get to end should recapture orig vals
        self.assertEqual(r, orig_vals)


if __name__ =='__main__':
    main()