File: test_markov.py

package info (click to toggle)
python-cogent 1.4.1-1.2
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze
  • size: 13,260 kB
  • ctags: 20,087
  • sloc: python: 116,163; ansic: 732; makefile: 74; sh: 9
file content (125 lines) | stat: -rw-r--r-- 5,340 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env/python
"""test_markov.py: tests of the MarkovGenerator class.

"""
from cogent.seqsim.markov import MarkovGenerator
from StringIO import StringIO
from operator import mul
from sys import path
from cogent.util.unit_test import TestCase, main
from numpy import array

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Rob Knight", "Jesse Zaneveld", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Development"

class MarkovGeneratorTests(TestCase):
    """Tests of the MarkovGenerator class."""
    def setUp(self):
        """Define a few well-known frequencies."""
        self.single = MarkovGenerator(['UUUUUUUUUU'], order=0)
        self.equal = MarkovGenerator(['UUUUUCCCCC'], order=0)
        self.unequal_0 = MarkovGenerator(['UCCCCCCCCC'], order=0)
        self.unequal_1 = MarkovGenerator(['UCCCCCCCCC'], order=-1)
        self.pairs = MarkovGenerator(['UCAUCAUCAUCAUCA'], order=1)
        self.randquads=MarkovGenerator(['AACCUAUCUACUACUAUCUUCAUAUUCC']\
            ,order=3, calc_entropy=True, delete_bad_suffixes=False)
        self.empty = MarkovGenerator('', order=0)
        self.linebreaks= MarkovGenerator(StringIO('abb\nbcc\nd\n'))
        self.dinucs=MarkovGenerator(['ATACATAC'],order=1)
        self.orderfive=MarkovGenerator(['AAAAAGAAAAATAAAAAGAAAAAT'],order=5)
        
    def test_init(self):
        """MarkovGenerator init should give right frequency distributions."""
        self.assertEqual(self.empty.Frequencies, {})
        self.assertEqual(self.single.Frequencies, {'':{'U':1.0}})
        self.assertEqual(self.equal.Frequencies, {'':{'U':0.5,'C':0.5}})
        self.assertEqual(self.unequal_0.Frequencies, {'':{'U':0.1,'C':0.9}})
        self.assertEqual(self.unequal_1.Frequencies, {'':{'U':0.5, 'C':0.5}})
        self.assertEqual(self.pairs.Frequencies, \
            {'U':{'C':1},'C':{'A':1},'A':{'U':1}})
        #check that recalculating the frequencies doesn't break anything
        self.pairs.calcFrequencies()
        self.assertEqual(self.pairs.Frequencies, \
            {'U':{'C':1},'C':{'A':1},'A':{'U':1}})
        exp={'AAC':{'C':1},'ACC':{'U':1},'CCU':{'A':1},'CUA':{'U':0.5,'C':0.5},\
             'UAU':{'U':1/3.0,'C':2/3.0},'AUC':{'U':1},'UCU':{'U':0.5,'A':0.5},\
             'UAC':{'U':1},'ACU':{'A':1},'CUU':{'C':1},'UUC':{'C':0.5,'A':0.5},\
             'UCA':{'U':1},'CAU':{'A':1},'AUA':{'U':1},'AUU':{'C':1},
             }
        obs = self.randquads.Frequencies
        self.assertFloatEqual(obs, exp)
        #check that resetting linebreaks has the desired effect
        self.assertEqual(self.linebreaks.Frequencies, \
            {'a':{'b':1},'b':{'b':0.5,'c':0.5},'c':{'c':1}})
        self.linebreaks.Linebreaks = True
        self.linebreaks.Text.seek(0)
        self.linebreaks.calcFrequencies()
        #NOTE: current algorithm won't extend over line breaks. If you want
        #to force use of line breaks, read into a single string.
        self.assertEqual(self.linebreaks.Frequencies, \
            {'a':{'b':1},'b':{'b':0.5,'c':0.5},'c':{'c':1}})
    
    def test_next(self):
        """MarkovGenerator.next should generate text with expected properties"""
        #haven't figured how to do this for longer correlation lengths yet
        pass

    def test_entropy(self):
        """MarkovGenerator._entropy() should correctly calculate average H"""
        self.assertFloatEqual(self.randquads.Entropy, \
        3.0/25 * 0.91829583405448956 + 8.0/25)
    
    def test_evaluateProbability(self):
        """Should calculate  proper P value for seq"""
        self.dinucs.Prior=1
        q=self.dinucs.evaluateProbability('AT')
        self.assertFloatEqual(q,.50)
        z=self.dinucs.evaluateProbability('ATAT')
        self.assertFloatEqual(z,.25)
        p=self.dinucs.evaluateProbability('ATATAT')
        self.assertFloatEqual(p,.125)
        j=self.dinucs.evaluateProbability('ATACAT')
        self.assertFloatEqual(j,.125)
        h=self.orderfive.evaluateProbability('AAAAAT')
        self.assertFloatEqual(h,.50)
    
    def test_replaceDegenerateBases(self):
        """strips degenerate bases...."""
        text = 'AATCGCRRCCYAATC'
        m=MarkovGenerator([text],order=2)
        self.assertEqual(m.Text, [text])
        m.replaceDegenerateBases()
        self.assertEqual(m.Text[0][0:6],'aatcgc')
        p=m.Text[0][6]
        q= p in ['a','t','c','g']
        self.assertEqual(q,True)

    def test_wordToUniqueKey(self):
        """wordToUniqueKey should generate proper integers"""
        m=MarkovGenerator(['aataacaataac'],order=2)
        word='gca'
        uniqueKey=m.wordToUniqueKey(word)
        #a=0 c=1 t=2 g=3
        #should be (4^0*3)+(4^1*1)+(4^2*0)=3+4+0=7
        self.assertEqual(uniqueKey,7)
   
    def test_evaluateArrayProbability(self):
        """evaluateArrayProbability should calc prob from array indices"""
        m=MarkovGenerator(['aaaaaaaatt'],order=0)
        #8 a's, 2 t's
        m.calcFrequencies()
        prob=m.evaluateArrayProbability(array([0,2]))
        self.assertFloatEqual(prob,0.16) #0.8*0.2
        prob=m.evaluateArrayProbability(array([0,1]))
        self.assertFloatEqual(prob,0) #0.8*0



if __name__ == '__main__':
    main()