File: test_best_likelihood.py

package info (click to toggle)
python-cogent 2023.2.12a1%2Bdfsg-2%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 12,416 kB
  • sloc: python: 89,165; makefile: 117; sh: 16
file content (150 lines) | stat: -rw-r--r-- 4,809 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python

import math

from unittest import TestCase, main

from cogent3 import DNA, make_aligned_seqs
from cogent3.evolve.best_likelihood import (
    BestLogLikelihood,
    _take,
    _transpose,
    aligned_columns_to_rows,
    count_column_freqs,
    get_G93_lnL_from_array,
    get_ML_probs,
)


__author__ = "Helen Lindsay"
__copyright__ = "Copyright 2007-2022, The Cogent Project"
__credits__ = ["Gavin Huttley", "Helen Lindsay"]
__license__ = "BSD-3"
__version__ = "2023.2.12a1"
__maintainer__ = "Helen Lindsay"
__email__ = "helen.lindsay@anu.edu.au"
__status__ = "Production"

from numpy.testing import assert_allclose


IUPAC_DNA_ambiguities = "NRYWSKMBDHV"


def makeSampleAlignment(gaps=False, ambiguities=False):
    if gaps:
        seqs_list = ["AAA--CTTTGG-T", "CCCCC-TATG-GT", "-AACCCTTTGGGT"]
    elif ambiguities:
        seqs_list = ["AARNCCTTTGGC", "CCNYCCTTTGSG", "CAACCCTGWGGG"]
    else:
        seqs_list = ["AAACCCGGGTTTA", "CCCGGGTTTAAAC", "GGGTTTAAACCCG"]
    seqs = list(zip("abc", seqs_list))
    return make_aligned_seqs(data=seqs)


class TestGoldman93(TestCase):
    def setUp(self):
        self.aln = makeSampleAlignment()
        self.gapped_aln = makeSampleAlignment(gaps=True)
        self.ambig_aln = makeSampleAlignment(ambiguities=True)

    def test_aligned_columns_to_rows(self):
        obs = aligned_columns_to_rows(self.aln[:-1], 3)
        expect = [
            ["AAA", "CCC", "GGG"],
            ["CCC", "GGG", "TTT"],
            ["GGG", "TTT", "AAA"],
            ["TTT", "AAA", "CCC"],
        ]
        assert obs == expect, (obs, expect)

        obs = aligned_columns_to_rows(self.aln, 1)
        expect = [
            ["A", "C", "G"],
            ["A", "C", "G"],
            ["A", "C", "G"],
            ["C", "G", "T"],
            ["C", "G", "T"],
            ["C", "G", "T"],
            ["G", "T", "A"],
            ["G", "T", "A"],
            ["G", "T", "A"],
            ["T", "A", "C"],
            ["T", "A", "C"],
            ["T", "A", "C"],
            ["A", "C", "G"],
        ]
        self.assertEqual(obs, expect)
        obs = aligned_columns_to_rows(self.gapped_aln[:-1], 3, allowed_chars="ACGT")
        expect = [["TTT", "TAT", "TTT"]]
        self.assertEqual(obs, expect)

        obs = aligned_columns_to_rows(
            self.ambig_aln, 2, exclude_chars=IUPAC_DNA_ambiguities
        )
        expect = [["AA", "CC", "CA"], ["CC", "CC", "CC"], ["TT", "TT", "TG"]]
        self.assertEqual(obs, expect)

    def test_count_column_freqs(self):
        columns = aligned_columns_to_rows(self.aln, 1)
        obs = count_column_freqs(columns)
        expect = {"A C G": 4, "C G T": 3, "G T A": 3, "T A C": 3}
        self.assertEqual(obs, expect)

        columns = aligned_columns_to_rows(self.aln[:-1], 2)
        obs = count_column_freqs(columns)
        expect = {
            "AA CC GG": 1,
            "AC CG GT": 1,
            "CC GG TT": 1,
            "GG TT AA": 1,
            "GT TA AC": 1,
            "TT AA CC": 1,
        }
        self.assertEqual(obs, expect)

    def test__transpose(self):
        """test transposing an array"""
        a = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
        e = [[0, 3, 6, 9], [1, 4, 7, 10], [2, 5, 8, 11]]
        self.assertEqual(_transpose(a), e)

    def test__take(self):
        """test taking selected rows from an array"""
        e = [[0, 3, 6, 9], [1, 4, 7, 10], [2, 5, 8, 11]]
        self.assertEqual(_take(e, [0, 1]), [[0, 3, 6, 9], [1, 4, 7, 10]])
        self.assertEqual(_take(e, [1, 2]), [[1, 4, 7, 10], [2, 5, 8, 11]])
        self.assertEqual(_take(e, [0, 2]), [[0, 3, 6, 9], [2, 5, 8, 11]])

    def test_get_ML_probs(self):
        columns = aligned_columns_to_rows(self.aln, 1)
        obs = get_ML_probs(columns, with_patterns=True)
        expect = {
            "A C G": 4 / 13.0,
            "C G T": 3 / 13.0,
            "G T A": 3 / 13.0,
            "T A C": 3 / 13.0,
        }
        sum = 0
        for pattern, lnL, freq in obs:
            assert_allclose(lnL, expect[pattern])
            sum += lnL
            self.assertTrue(lnL >= 0)
        assert_allclose(sum, 1)

    def test_get_G93_lnL_from_array(self):
        columns = aligned_columns_to_rows(self.aln, 1)
        obs = get_G93_lnL_from_array(columns)
        expect = math.log(math.pow(4 / 13.0, 4)) + 3 * math.log(math.pow(3 / 13.0, 3))
        assert_allclose(obs, expect)

    def test_BestLogLikelihood(self):
        obs = BestLogLikelihood(self.aln, DNA.alphabet)
        expect = math.log(math.pow(4 / 13.0, 4)) + 3 * math.log(math.pow(3 / 13.0, 3))
        assert_allclose(obs, expect)
        lnL, l = BestLogLikelihood(self.aln, DNA.alphabet, return_length=True)
        self.assertEqual(l, len(self.aln))


if __name__ == "__main__":
    main()