1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
import numpy
from cogent.util.unit_test import TestCase, main
from cogent.maths.stats.period import chi_square, factorial, g_statistic, \
circular_indices, _seq_to_symbols, seq_to_symbols, blockwise_bootstrap, \
SeqToSymbols
from cogent.maths.period import ipdft, hybrid, auto_corr, Hybrid, Ipdft, \
AutoCorrelation
__author__ = "Hua Ying, Julien Epps and Gavin Huttley"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Gavin Huttley"
__email__ = "Gavin.Huttley@anu.edu.au"
__status__ = "Production"
class TestPeriodStat(TestCase):
def setUp(self):
x = [1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0,
1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 1, 1, 1, 0, 0, 0]
self.x = numpy.array(x)
self.sig = numpy.array(self.x, numpy.float64)
self.motifs = ['AA', 'TT', 'TA']
def test_chi_square(self):
D, cs_p_val = chi_square(self.x, 10)
self.assertEqual('%.4f'%D, '0.4786')
self.assertEqual('%.4f'%cs_p_val, '0.4891')
def test_factorial(self):
self.assertEqual(factorial(1), 1)
self.assertEqual(factorial(4), 24)
self.assertEqual(factorial(0), 1)
def test_g_statitic(self):
"""calc g-stat correctly"""
X, periods = ipdft(self.sig, llim=2, ulim=39)
g_obs, p_val = g_statistic(X)
self.assertFloatEqual(p_val, 0.9997, eps=1e-3)
self.assertFloatEqual(g_obs, 0.0577, eps=1e-3)
def test_circular_indices(self):
v = range(10)
self.assertEqual(circular_indices(v, 8, 10, 4), [8,9,0,1])
self.assertEqual(circular_indices(v, 9, 10, 4), [9,0,1,2])
self.assertEqual(circular_indices(v, 4, 10, 4), [4,5,6,7])
def test_seq_to_symbol(self):
"""both py and pyx seq_to_symbol versions correctly convert a sequence"""
motifs = ['AA', 'AT', 'TT']
symbols = _seq_to_symbols('AATGGTTA', motifs, 2)
self.assertEqual(symbols, numpy.array([1,1,0,0,0,1,0,0]))
symbols = seq_to_symbols('AAGATT', motifs, 2, numpy.zeros(6, numpy.uint8))
self.assertEqual(symbols, numpy.array([1,0,0,1,1,0]))
def test_seq_to_symbol_factory(self):
"""checks factory function for conversion works"""
motifs = ['AA', 'AT', 'TT']
seq_to_symbols = SeqToSymbols(motifs)
self.assertEqual(seq_to_symbols('AATGGTTA'),
numpy.array([1,1,0,0,0,1,0,0]))
self.assertEqual(seq_to_symbols('AAGATT'),
numpy.array([1,0, 0, 1, 1, 0], numpy.uint8))
def test_permutation(self):
s = 'ATCGTTGGGACCGGTTCAAGTTTTGGAACTCGCAAGGGGTGAATGGTCTTCGTCTAACGCTGG'\
'GGAACCCTGAATCGTTGTAACGCTGGGGTCTTTAACCGTTCTAATTTAACGCTGGGGGGTTCT'\
'AATTTTTAACCGCGGAATTGCGTC'
seq_to_symbol = SeqToSymbols(self.motifs, length=len(s))
hybrid_calc = Hybrid(len(s), llim=2, period = 4)
ipdft_calc = Ipdft(len(s), llim=2, period = 4)
stat, p = blockwise_bootstrap(s, hybrid_calc, block_size=10,
num_reps=1000, seq_to_symbols=seq_to_symbol)
# print 's=%.4f; p=%.3f' % (stat, p)
stat, p = blockwise_bootstrap(s, ipdft_calc, block_size=10,
num_reps=1000, seq_to_symbols=seq_to_symbol)
# print 's=%.4f; p=%.3f' % (stat, p)
def test_permutation_all(self):
"""performs permutation test of Hybrid, but considers all stats"""
s = 'ATCGTTGGGACCGGTTCAAGTTTTGGAACTCGCAAGGGGTGAATGGTCTTCGTCTAACGCTGG'\
'GGAACCCTGAATCGTTGTAACGCTGGGGTCTTTAACCGTTCTAATTTAACGCTGGGGGGTTCT'\
'AATTTTTAACCGCGGAATTGCGTC'
seq_to_symbol = SeqToSymbols(self.motifs, length=len(s))
hybrid_calc = Hybrid(len(s), period = 4, return_all=True)
stat, p = blockwise_bootstrap(s, hybrid_calc, block_size=10,
num_reps=1000, seq_to_symbols=seq_to_symbol)
# print 's=%s; p=%s' % (stat, p)
def test_get_num_stats(self):
"""calculators should return correct num stats"""
hybrid_calc = Hybrid(150, llim=2, period = 4)
ipdft_calc = Ipdft(150, llim=2, period = 4)
autocorr_calc = AutoCorrelation(150, llim=2, period = 4)
self.assertEqual(hybrid_calc.getNumStats(), 1)
self.assertEqual(ipdft_calc.getNumStats(), 1)
self.assertEqual(autocorr_calc.getNumStats(), 1)
hybrid_calc = Hybrid(150, llim=2, period = 4, return_all=True)
self.assertEqual(hybrid_calc.getNumStats(), 3)
def test_permutation_skips(self):
"""permutation test correctly handles data without symbols"""
s = 'N' * 150
seq_to_symbol = SeqToSymbols(self.motifs, length=len(s))
ipdft_calc = Ipdft(len(s), llim=2, period = 4)
stat, p = blockwise_bootstrap(s, ipdft_calc, block_size=10,
num_reps=1000, seq_to_symbols=seq_to_symbol, num_stats=1)
self.assertEqual(stat, 0.0)
self.assertEqual(p, 1.0)
if __name__ == '__main__':
main()
|