1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
|
#!/usr/bin/env python
#file test_parse.py
from numpy import array
from cogent.util.unit_test import TestCase, main
from cogent.maths.stats.rarefaction import (subsample,
naive_histogram,
wrap_numpy_histogram,
rarefaction,
subsample_freq_dist_nonzero,
subsample_random,
subsample_multinomial)
__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"
class TopLevelTests(TestCase):
"""Tests of top-level functions"""
def test_subsample(self):
"""subsample should return a random subsample of a vector"""
a = array([0,5,0])
self.assertEqual(subsample(a,5), array([0,5,0]))
self.assertEqual(subsample(a,2), array([0,2,0]))
b = array([2,0,1])
# selecting 2 counts from the vector 1000 times yields each of the
# two possible results at least once each
b = array([2,0,1])
actual = {}
for i in range(1000):
e = subsample(b,2)
actual[tuple(e)] = None
self.assertEqual(actual, {(1,0,1):None,(2,0,0):None})
obs = subsample(b,2)
assert (obs == array([1,0,1])).all() or (obs == array([2,0,0])).all()
def test_subsample_freq_dist_nonzero(self):
"""subsample_freq_dist_nonzero should return a random subsample of a vector
"""
a = array([0,5,0])
self.assertEqual(subsample_freq_dist_nonzero(a,5), array([0,5,0]))
self.assertEqual(subsample_freq_dist_nonzero(a,2), array([0,2,0]))
# selecting 35 counts from the vector 1000 times yields each at least
# two different results
b = array([2,0,1,2,1,8,6,0,3,3,5,0,0,0,5])
actual = {}
for i in range(100):
e = subsample_freq_dist_nonzero(b,35)
self.assertTrue(e.sum(),35)
actual[tuple(e)] = None
self.assertTrue(len(actual) > 1)
# selecting 2 counts from the vector 1000 times yields each of the
# two possible results at least once each (note that an issue with an
# inital buggy version of subsample_freq_dist_nonzero was detected with
# this test, so don't remove - )
b = array([2,0,1])
actual = {}
for i in range(1000):
e = subsample_freq_dist_nonzero(b,2)
actual[tuple(e)] = None
self.assertTrue(e.sum() == 2)
self.assertEqual(actual, {(1,0,1):None,(2,0,0):None})
def test_subsample_random(self):
"""subsample_random should return a random subsample of a vector
"""
a = array([0,5,0])
self.assertEqual(subsample_random(a,5), array([0,5,0]))
self.assertEqual(subsample_random(a,2), array([0,2,0]))
# selecting 35 counts from the vector 1000 times yields each at least
# two different results
b = array([2,0,1,2,1,8,6,0,3,3,5,0,0,0,5])
actual = {}
for i in range(100):
e = subsample_random(b,35)
self.assertTrue(e.sum(),35)
actual[tuple(e)] = None
self.assertTrue(len(actual) > 1)
# selecting 2 counts from the vector 1000 times yields each of the
# two possible results at least once each
b = array([2,0,1])
actual = {}
for i in range(1000):
e = subsample_random(b,2)
actual[tuple(e)] = None
self.assertTrue(e.sum() == 2)
self.assertEqual(actual, {(1,0,1):None,(2,0,0):None})
def test_subsample_multinomial(self):
"""subsample_multinomial should return a random subsample of a vector
"""
# selecting 35 counts from the vector 1000 times yields each at least
# two different results
actual = {}
for i in range(100):
b = array([2,0,1,2,1,8,6,0,3,3,5,0,0,0,5])
e = subsample_multinomial(b,35)
self.assertTrue(e.sum(),35)
actual[tuple(e)] = None
self.assertTrue(len(actual) > 1)
def test_naive_histogram(self):
"""naive_histogram should produce expected result"""
vals = array([1,0,0,3])
self.assertEqual(naive_histogram(vals), array([2,1,0,1]))
self.assertEqual(naive_histogram(vals, 4), array([2,1,0,1,0]))
def test_wrap_numpy_histogram(self):
"""wrap_numpy_histogram should provide expected result"""
vals = array([1,0,0,3])
h_f = wrap_numpy_histogram(3)
self.assertEqual(h_f(vals), array([2,1,0,1]))
h_f = wrap_numpy_histogram(4)
self.assertEqual(h_f(vals, 4), array([2,1,0,1,0]))
def test_rarefaction(self):
"""rarefaction should produce expected curve"""
vals = array([5,0,0,3,0,10], dtype=int)
res = [r.copy() for r in rarefaction(vals, stride=1)]
self.assertEqual(len(res), 18)
for i, r in enumerate(res):
self.assertEqual(r.sum(), i+1)
#make sure we didn't add any bad counts
for pos in [1,2,4]:
self.assertEqual(r[pos], 0)
#when we get to end should recapture orig vals
self.assertEqual(r, vals)
res = [r.copy() for r in rarefaction(vals, stride=3)]
self.assertEqual(len(res), 6)
for i, r in enumerate(res):
self.assertEqual(r.sum(), 3*(i+1))
#make sure we didn't add any bad counts
for pos in [1,2,4]:
self.assertEqual(r[pos], 0)
#when we get to end should recapture orig vals
self.assertEqual(r, vals)
#repeat everything above using alt. input format
orig_vals = vals.copy()
vals = array([0,0,0,0,0,3,3,3,5,5,5,5,5,5,5,5,5,5], dtype=int)
res = [r.copy() for r in rarefaction(vals, stride=1, is_counts=False)]
self.assertEqual(len(res), 18)
for i, r in enumerate(res):
self.assertEqual(r.sum(), i+1)
#make sure we didn't add any bad counts
for pos in [1,2,4]:
self.assertEqual(r[pos], 0)
#when we get to end should recapture orig vals
self.assertEqual(r, orig_vals)
res = [r.copy() for r in rarefaction(vals, stride=3, is_counts=False)]
self.assertEqual(len(res), 6)
for i, r in enumerate(res):
self.assertEqual(r.sum(), 3*(i+1))
#make sure we didn't add any bad counts
for pos in [1,2,4]:
self.assertEqual(r[pos], 0)
#when we get to end should recapture orig vals
self.assertEqual(r, orig_vals)
if __name__ =='__main__':
main()
|