File: test_bootstrap.py

package info (click to toggle)
python-cogent 1.5.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 16,424 kB
  • ctags: 24,343
  • sloc: python: 134,200; makefile: 100; ansic: 17; sh: 10
file content (154 lines) | stat: -rw-r--r-- 5,159 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python

import sys
import unittest

from cogent.evolve import likelihood_function, \
     parameter_controller, substitution_model, bootstrap

from cogent import LoadSeqs, LoadTree

import os

__author__ = "Peter Maxwell and  Gavin Huttley"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Peter Maxwell", "Gavin Huttley", "Matthew Wakefield",
                    "Helen Lindsay", "Andrew Butterfield"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Gavin Huttley"
__email__ = "gavin.huttley@anu.edu.au"
__status__ = "Production"

base_path = os.getcwd()
data_path = os.path.join(base_path, 'data')

seqnames = ['Chimpanzee', 'Rhesus', 'Orangutan',
            'Human']

REPLICATES = 2

def float_ge_zero(num, epsilon=1e-6):
    """compare whether a floating point value is >= zero with epsilon
    tolerance."""
    if num >= 0.0:
        return True
    elif abs(num - 0.0) < epsilon:
        return True
    else:
        return False

class BootstrapTests(unittest.TestCase):
    def gettree(self):
        treeobj = LoadTree(filename=os.path.join(data_path,"murphy.tree"))
        
        return treeobj.getSubTree(seqnames)
    
    def getsubmod(self,choice = 'F81'):
        if choice == 'F81':
            return substitution_model.Nucleotide(model_gaps=True)
        else:
            return substitution_model.Nucleotide(
                    model_gaps=True,
                    predicates = {'kappa':'transition'})
    
    def getalignmentobj(self):
        moltype = self.getsubmod().MolType
        alignmentobj = LoadSeqs(
                filename = os.path.join(data_path, "brca1.fasta"),
                moltype = moltype)
        return alignmentobj.takeSeqs(seqnames)[:1000]
    
    def getcontroller(self,treeobj, submodobj):
        return submodobj.makeParamController(treeobj)
    
    def create_null_controller(self, alignobj):
        """A null model controller creator.
        We constrain the human chimp branches to be equal."""
        treeobj = self.gettree()
        submodobj = self.getsubmod()
        controller = self.getcontroller(treeobj, submodobj)
                
        # we are setting a local molecular clock for human/chimp
        controller.setLocalClock('Human', 'Chimpanzee')
        return controller
    
    def create_alt_controller(self,alignobj):
        """An alternative model controller. Chimp/Human
        branches are free to vary."""
        
        treeobj = self.gettree()
        submodobj = self.getsubmod()
        controller = self.getcontroller(treeobj, submodobj)
        return controller
        
    def calclength(self, likelihood_function):
        """This extracts the length of the human branch and returns it."""
        
        return likelihood_function.getParamValue("length", 'Human')
        
    def test_conf_int(self):
        """testing estimation of confidence intervals."""
        alignobj = self.getalignmentobj()
        
        bstrap = bootstrap.EstimateConfidenceIntervals(
                self.create_null_controller(alignobj), self.calclength, alignobj)
        bstrap.setNumReplicates(REPLICATES)
        bstrap.setSeed(1984)
        bstrap.run(local=True)
        samplelnL = bstrap.getSamplelnL()
        for lnL in samplelnL:
            assert lnL < 0.0, lnL
            
        observed_stat = bstrap.getObservedStats()
        assert float_ge_zero(observed_stat)
        
        samplestats = bstrap.getSampleStats()
        
        for stat in samplestats:
            assert float_ge_zero(stat)
        
        self.assertEqual(len(samplelnL), REPLICATES)
        self.assertEqual(len(samplestats), REPLICATES)
        
    def test_prob(self):
        """testing estimation of probability."""
        import sys
        alignobj = self.getalignmentobj()
        prob_bstrap = bootstrap.EstimateProbability(
                self.create_null_controller(alignobj),
                self.create_alt_controller(alignobj),
                alignobj)
        prob_bstrap.setNumReplicates(REPLICATES)
        prob_bstrap.setSeed(1984)
        prob_bstrap.run(local=True)
        
        self.assertEqual(len(prob_bstrap.getSampleLRList()), REPLICATES)
        
        assert float_ge_zero(prob_bstrap.getObservedLR())
        
        # check the returned sample LR's for being > 0.0
        for sample_LR in prob_bstrap.getSampleLRList():
            #print sample_LR
            assert float_ge_zero(sample_LR), sample_LR
            
        # check the returned observed lnL fulfill this assertion too, really
        # testing their order
        null, alt = prob_bstrap.getObservedlnL()
        assert float_ge_zero(2 * (alt - null))
            
        # now check the structure of the returned sample
        for snull, salt in prob_bstrap.getSamplelnL():
            #print salt, snull, 2*(salt-snull)
            assert float_ge_zero(2 * (salt - snull))
    
        # be sure we get something back from getprob if proc rank is 0
        assert float_ge_zero(prob_bstrap.getEstimatedProb())
        
        
if __name__ == "__main__":
        unittest.main()