File: test_bootstrap.py

package info (click to toggle)
python-cogent 2024.5.7a1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 74,600 kB
  • sloc: python: 92,479; makefile: 117; sh: 16
file content (140 lines) | stat: -rw-r--r-- 4,568 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python

import os
import unittest

from cogent3 import load_aligned_seqs, load_tree
from cogent3.evolve import bootstrap, substitution_model


base_path = os.getcwd()
data_path = os.path.join(base_path, "data")

seqnames = ["Chimpanzee", "Rhesus", "Orangutan", "Human"]

REPLICATES = 2


def float_ge_zero(num, epsilon=1e-6):
    """compare whether a floating point value is >= zero with epsilon
    tolerance."""
    if num >= 0.0:
        return True
    elif abs(num - 0.0) < epsilon:
        return True
    else:
        return False


class BootstrapTests(unittest.TestCase):
    def gettree(self):
        treeobj = load_tree(filename=os.path.join(data_path, "murphy.tree"))

        return treeobj.get_sub_tree(seqnames)

    def getsubmod(self, choice="F81"):
        if choice == "F81":
            return substitution_model.TimeReversibleNucleotide(model_gaps=True)
        else:
            return substitution_model.TimeReversibleNucleotide(
                model_gaps=True, predicates={"kappa": "transition"}
            )

    def getalignmentobj(self):
        moltype = self.getsubmod().moltype
        alignmentobj = load_aligned_seqs(
            os.path.join(data_path, "brca1.fasta"), moltype=moltype
        )
        return alignmentobj.take_seqs(seqnames)[:1000]

    def getcontroller(self, treeobj, submodobj):
        return submodobj.make_likelihood_function(treeobj)

    def create_null_controller(self, alignobj):
        """A null model controller creator.
        We constrain the human chimp branches to be equal."""
        treeobj = self.gettree()
        submodobj = self.getsubmod()
        controller = self.getcontroller(treeobj, submodobj)

        # we are setting a local molecular clock for human/chimp
        controller.set_local_clock("Human", "Chimpanzee")
        return controller

    def create_alt_controller(self, alignobj):
        """An alternative model controller. Chimp/Human
        branches are free to vary."""

        treeobj = self.gettree()
        submodobj = self.getsubmod()
        controller = self.getcontroller(treeobj, submodobj)
        return controller

    def calclength(self, likelihood_function):
        """This extracts the length of the human branch and returns it."""

        return likelihood_function.get_param_value("length", "Human")

    def test_conf_int(self):
        """testing estimation of confidence intervals."""
        alignobj = self.getalignmentobj()

        bstrap = bootstrap.EstimateConfidenceIntervals(
            self.create_null_controller(alignobj), self.calclength, alignobj
        )
        bstrap.set_num_replicates(REPLICATES)
        bstrap.set_seed(1984)
        bstrap.run(local=True, show_progress=False)
        samplelnL = bstrap.get_sample_lnL()
        for lnL in samplelnL:
            assert lnL < 0.0, lnL

        observed_stat = bstrap.get_observed_stats()
        assert float_ge_zero(observed_stat)

        samplestats = bstrap.getSampleStats()

        for stat in samplestats:
            assert float_ge_zero(stat)

        self.assertEqual(len(samplelnL), REPLICATES)
        self.assertEqual(len(samplestats), REPLICATES)

    def test_prob(self):
        """testing estimation of probability."""

        alignobj = self.getalignmentobj()
        prob_bstrap = bootstrap.EstimateProbability(
            self.create_null_controller(alignobj),
            self.create_alt_controller(alignobj),
            alignobj,
        )
        prob_bstrap.set_num_replicates(REPLICATES)
        prob_bstrap.set_seed(1984)
        prob_bstrap.run(local=True, show_progress=False)

        self.assertEqual(len(prob_bstrap.get_sample_LR_list()), REPLICATES)

        assert float_ge_zero(prob_bstrap.get_observed_LR())

        # check the returned sample LR's for being > 0.0
        for sample_LR in prob_bstrap.get_sample_LR_list():
            # print sample_LR
            assert float_ge_zero(sample_LR), sample_LR

        # check the returned observed lnL fulfill this assertion too, really
        # testing their order
        null, alt = prob_bstrap.get_observed_lnL()
        assert float_ge_zero(2 * (alt - null))

        # now check the structure of the returned sample
        for snull, salt in prob_bstrap.get_sample_lnL():
            # print salt, snull, 2*(salt-snull)
            assert float_ge_zero(2 * (salt - snull))

        # be sure we get something back from getprob if proc rank is 0
        assert float_ge_zero(prob_bstrap.get_estimated_prob())


if __name__ == "__main__":
    unittest.main()