File: test_popgenstat.py

package info (click to toggle)
python-dendropy 4.2.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 68,392 kB
  • ctags: 3,947
  • sloc: python: 41,840; xml: 1,400; makefile: 15
file content (128 lines) | stat: -rw-r--r-- 5,091 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#! /usr/bin/env python

##############################################################################
##  DendroPy Phylogenetic Computing Library.
##
##  Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
##  All rights reserved.
##
##  See "LICENSE.rst" for terms and conditions of usage.
##
##  If you use this work or any portion thereof in published work,
##  please cite it as:
##
##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################

"""
Tests population genetic statistic calculation.
"""

import unittest
import math
import dendropy
from dendropy.test.support import dendropytest
from dendropy.test.support import pathmap
from dendropy.utility import messaging
from dendropy.calculate import popgenstat
_LOG = messaging.get_logger(__name__)

class TajimasDTests(dendropytest.ExtendedTestCase):

    def setUp(self):
        s = """\
            >s1
            ATAATAAAAA AATAATAAAA AAATAAAAAA AATAAAAAAA A
            >s2
            AAAAAAAATA AATAATAAAA AAATAAAAAA AAAAAAAAAA A
            >s3
            AAAATAAAAA TATAATAAAA AAATATAAAA AAAAAAAAAA A
            >s4
            AAAAAAAAAA AATAATAAAA AAATAAATAA ATAAAAAAAA A
            >s5
            AAAATAAAAA AAATATAAAA AAATAAAAAA AAAAAAAAAA A
            >s6
            AAAATAAAAA AAAAATAAAA AAAAAAAAAA AAAAATAAAA A
            >s7
            AAAAAATAAA AATAATAAAA AAATAAAAAA AAAAAAAAAA A
            >s8
            AAAAAAAAAA AAAAATAAAA AAATAAAAAA AAAAAAAAAT A
            >s9
            AAAAAAAAAA AAAAAAAAAA AAATAAAAAA AAAAAAAAAA A
            >s10
            AAAAAAAAAA AAAAATAAAA AAATAATAAA AAAAAAAAAA A"""
        self.matrix = dendropy.DnaCharacterMatrix.get_from_string(s, 'fasta')

        s_with_missing = """\
            >s1
            ATAATAAAAA AATAATAAAA AAATAAAAAA AATAAAAAAA A
            >s2
            AAAAAAAATA AATAATAAAA AAATAAAAAA AAAAAAAAAA A
            >s3
            AAAATAAAAA TATAATAAAA AAATATAAAA ?AAAAAAAAA ?
            >s4
            AAAAAAAAAA AATAATAAAA AAATAAATAA ATAAAAAAAA A
            >s5
            AAAATAAAAA AAATATAAAA AAATAAAAAA AAAAAAAAAA A
            >s6
            ?AAATAAAAA AAAAATAAAA AAAAAAAAAA AAAAATAAAA A
            >s7
            AAAAAATAAA AATAATAAAA AAATAAAAAA AAAAAAAAAA A
            >s8
            AAAAAAAAAA AAAAATAAAA AAATAAAAAA AAAAAAAAAT A
            >s9
            AAAAAAAAAA AAAAAAAAAA AAATAAAAAA ?AAAAAAAAA A
            >s10
            AAAAAAAAAA AAAAATAAAA AAATAATAAA AAAAAAAAAA A"""
        self.matrix_with_missing = dendropy.DnaCharacterMatrix.get_from_string(s_with_missing, 'fasta')

    def testTajimasD(self):
        self.assertAlmostEqual(popgenstat.tajimas_d(self.matrix), -1.44617198561, 4)

    def testTajimasD_with_missing(self):
        self.assertAlmostEqual(popgenstat.tajimas_d(self.matrix_with_missing), -1.44617198561, 4)

class SinglePopTest(dendropytest.ExtendedTestCase):

    data = dendropy.DnaCharacterMatrix.get_from_path(pathmap.char_source_path('COII_Apes.nex'), schema="nexus")

    def test_num_segregating_sites(self):
        self.assertEqual(popgenstat.num_segregating_sites(self.data, ignore_uncertain=True), 183)

    def test_average_number_of_pairwise_differences(self):
        self.assertAlmostEqual(popgenstat.average_number_of_pairwise_differences(self.data, ignore_uncertain=True),  62.75000, 4)

    def test_nucleotide_diversity(self):
        self.assertAlmostEqual(popgenstat.nucleotide_diversity(self.data, ignore_uncertain=True), 0.09174, 4)

    def test_tajimas_d(self):
        self.assertAlmostEqual(popgenstat.tajimas_d(self.data, ignore_uncertain=True), 1.12467, 4)

    def test_wattersons_theta(self):
        self.assertAlmostEqual(popgenstat.wattersons_theta(self.data, ignore_uncertain=True), 49.00528, 4)

class PopulationPairSummaryStatisticsTests(dendropytest.ExtendedTestCase):

    def testPopulationPairSummaryStatistics(self):
        seqs = dendropy.DnaCharacterMatrix.get_from_path(pathmap.char_source_path('orti.nex'), schema="nexus")
        p1 = []
        p2 = []
        for idx, t in enumerate(seqs.taxon_namespace):
            if t.label.startswith('EPAC'):
                p1.append(seqs[t])
            else:
                p2.append(seqs[t])
        pp = popgenstat.PopulationPairSummaryStatistics(p1, p2)
        self.assertAlmostEqual(pp.average_number_of_pairwise_differences, 11.28063, 4)
        self.assertAlmostEqual(pp.average_number_of_pairwise_differences_between, 16.119047619, 4)
        self.assertAlmostEqual(pp.average_number_of_pairwise_differences_within, 10.2191697192, 4)
        self.assertAlmostEqual(pp.average_number_of_pairwise_differences_net, 5.89987789988, 4)
        self.assertEqual(pp.num_segregating_sites, 29)
        self.assertAlmostEqual(pp.wattersons_theta, 7.85734688643, 4)
        self.assertAlmostEqual(pp.tajimas_d, 1.65318627677, 4)
        self.assertAlmostEqual(pp.wakeleys_psi, 0.8034976, 2)

if __name__ == "__main__":
    unittest.main()