File: test_ProtParam.py

package info (click to toggle)
python-biopython 1.85%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 126,372 kB
  • sloc: xml: 1,047,995; python: 332,722; ansic: 16,944; sql: 1,208; makefile: 140; sh: 81
file content (271 lines) | stat: -rw-r--r-- 13,453 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# Copyright 2003-2004 by Iddo Friedberg.  All rights reserved.
# Revisions copyright 2008-2010 by Peter Cock. All rights reserved.
# Revisions copyright 2012 by Matt Fenwick. All rights reserved.
# Revisions copyright 2012 by Kai Blin. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Bio.SeqUtils.ProtParam and related code."""

import unittest

from Bio import BiopythonDeprecationWarning
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqUtils import molecular_weight
from Bio.SeqUtils import ProtParam
from Bio.SeqUtils import ProtParamData


class ProtParamTest(unittest.TestCase):
    """Tests for ProtParam."""

    def setUp(self):
        """Initialise objects."""
        text = "MAEGEITTFTALTEKFNLPPGNYKKPKLLYCSNGGHFLRILPDGTVDGTRDRSDQHIQLQLSAESVGEVYIKSTETGQYLAMDTSGLLYGSQTPSEECLFLERLEENHYNTYTSKKHAEKNWFVGLKKNGSCKRGPRTHYGQKAILFLPLPV"
        seq = Seq(text)
        record = SeqRecord(seq)
        analysis_text = ProtParam.ProteinAnalysis(text)
        analysis_seq = ProtParam.ProteinAnalysis(seq)
        analysis_record = ProtParam.ProteinAnalysis(record)
        self.text = text
        self.sequences = (text, seq, record)
        self.analyses = (analysis_text, analysis_seq, analysis_record)

    def test_count_amino_acids(self):
        """Calculate amino acid counts."""
        for analysis in self.analyses:
            count_dict = analysis.count_amino_acids()
            for i in count_dict:
                self.assertEqual(count_dict[i], self.text.count(i))

    def test_get_amino_acids_percent(self):
        """Calculate amino acid percentages (DEPRECATED)."""
        with self.assertWarns(BiopythonDeprecationWarning):
            for analysis in self.analyses:
                percent_dict = analysis.get_amino_acids_percent()
                seq_len = len(self.text)
                for i in percent_dict:
                    self.assertAlmostEqual(
                        percent_dict[i], self.text.count(i) / seq_len
                    )

    def test_amino_acids_percent(self):
        """Calculate amino acid percentages."""
        for analysis in self.analyses:
            seq_len = len(self.text)
            for i in analysis.amino_acids_percent:
                self.assertAlmostEqual(
                    analysis.amino_acids_percent[i],
                    (self.text.count(i) * 100 / seq_len),
                )

    def test_get_molecular_weight(self):
        """Calculate protein molecular weight."""
        for analysis in self.analyses:
            self.assertAlmostEqual(analysis.molecular_weight(), 17103.16, 2)

    def test_get_monoisotopic_molecular_weight(self):
        """Calculate monoisotopic molecular weight."""
        for sequence in self.sequences:
            analysis = ProtParam.ProteinAnalysis(sequence, monoisotopic=True)
            self.assertAlmostEqual(analysis.molecular_weight(), 17092.61, 2)

    def test_get_molecular_weight_identical(self):
        """Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
        # This test is somehow useless, since ProteinAnalysis.molecular_weight
        # is internally calling SeqUtils.molecular_weight.
        mw_2 = molecular_weight(self.text, seq_type="protein")
        for analysis in self.analyses:
            mw_1 = analysis.molecular_weight()
            self.assertAlmostEqual(mw_1, mw_2)

    def test_get_monoisotopic_molecular_weight_identical(self):
        """Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
        # This test is somehow useless, since ProteinAnalysis.molecular_weight
        # is internally calling SeqUtils.molecular_weight.
        mw_2 = molecular_weight(self.text, seq_type="protein", monoisotopic=True)
        for sequence in self.sequences:
            analysis = ProtParam.ProteinAnalysis(sequence, monoisotopic=True)
            mw_1 = analysis.molecular_weight()
            self.assertAlmostEqual(mw_1, mw_2)

    def test_aromaticity(self):
        """Calculate protein aromaticity."""
        for analysis in self.analyses:
            # Old test used a number rounded to two digits, so use the same
            self.assertAlmostEqual(analysis.aromaticity(), 0.10, 2)

    def test_instability_index(self):
        """Calculate protein instability index."""
        for analysis in self.analyses:
            # Old test used a number rounded to two digits, so use the same
            self.assertAlmostEqual(analysis.instability_index(), 41.98, 2)

    def test_flexibility(self):
        """Calculate protein flexibility."""
        # Turn black code style off
        # fmt: off
        expected_flexibility = [
            0.9825119047619049, 1.0166904761904763, 0.9947857142857144,
            0.9660238095238095, 0.9890714285714285, 0.9737261904761906,
            0.9789166666666669, 1.004547619047619, 1.0235357142857144,
            1.0163214285714286, 0.981297619047619, 1.0388809523809523,
            0.9956309523809524, 1.0379047619047619, 1.014654761904762,
            1.015154761904762, 1.0317619047619049, 1.0100833333333334,
            1.0738333333333334, 1.0460952380952382, 1.0333571428571429,
            1.0429761904761905, 0.9842738095238095, 0.9984404761904762,
            0.9814404761904763, 0.9715357142857144, 1.0063690476190477,
            0.988952380952381, 0.9930952380952381, 0.9962619047619047,
            0.9774523809523811, 0.9747857142857144, 0.9701547619047618,
            0.9759404761904762, 0.9515119047619047, 0.9745714285714286,
            1.007642857142857, 1.0024523809523809, 1.0019761904761904,
            1.0053571428571428, 1.000595238095238, 1.0385238095238094,
            1.0090357142857143, 1.0095, 1.0207142857142857, 1.0371071428571428,
            1.0223690476190477, 1.0373809523809523, 1.030095238095238,
            1.0166190476190475, 0.9939404761904762, 0.9935833333333335,
            1.009547619047619, 0.9678095238095237, 1.0027380952380953,
            0.9720595238095241, 1.0215952380952382, 0.996904761904762,
            1.0330238095238098, 1.0140714285714285, 0.9977976190476192,
            1.0319285714285715, 1.016714285714286, 0.9713928571428571,
            0.9921666666666669, 0.9904404761904763, 1.0350238095238096,
            1.0021904761904763, 1.0092738095238096, 1.0462380952380952,
            1.012392857142857, 1.0289761904761903, 1.0108095238095236,
            0.9762619047619049, 0.9885357142857144, 0.9901428571428571,
            0.9795119047619048, 1.014059523809524, 0.9857976190476189,
            1.0114404761904763, 0.9970357142857144, 0.9755238095238097,
            0.9871428571428573, 0.9820952380952382, 1.006095238095238,
            0.9997023809523811, 1.0065238095238094, 1.0149047619047618,
            1.0451071428571428, 1.0396785714285717, 1.0452142857142857,
            1.0262619047619048, 0.9735952380952381, 0.998404761904762,
            0.9777976190476191, 0.9773809523809524, 1.008214285714286,
            0.9772619047619048, 0.9955000000000002, 1.0482261904761907,
            1.0262499999999999, 1.0189404761904763, 0.9988452380952382,
            1.0009285714285714, 1.0204404761904762, 0.9839880952380953,
            0.9809166666666669, 1.01475, 1.0234166666666669, 1.0355476190476192,
            1.033107142857143, 1.011154761904762, 1.0480714285714288,
            1.0591190476190477, 1.033809523809524, 1.008107142857143,
            0.9757261904761906, 0.9885714285714287, 0.9831428571428572,
            1.000595238095238, 0.9739761904761908, 1.0377619047619049,
            1.0295357142857142, 1.0269642857142858, 1.0371904761904762,
            1.0385476190476193, 1.0057023809523808, 1.06075, 1.006714285714286,
            1.0269642857142858, 1.0229761904761905, 1.0046309523809522,
            1.0053690476190476, 0.9886666666666667, 0.9931666666666669, 1.01775,
            1.003297619047619, 1.0161666666666667, 0.977440476190476,
            0.9762738095238096, 0.9785833333333332, 0.9609642857142857,
            0.9650833333333334]
        # Turn black code style on
        # fmt: on

        for analysis in self.analyses:
            flexibility = analysis.flexibility()
            self.assertEqual(
                len(flexibility), len(expected_flexibility), "Output length differs"
            )
            for f, e in zip(flexibility, expected_flexibility):
                self.assertAlmostEqual(f, e)

    def test_isoelectric_point(self):
        """Calculate the isoelectric point."""
        for analysis in self.analyses:
            # Old test used a number rounded to two digits, so use the same
            self.assertAlmostEqual(analysis.isoelectric_point(), 7.72, 2)

    def test_charge_at_pH(self):
        """Test charge_at_pH function."""
        for analysis in self.analyses:
            self.assertAlmostEqual(analysis.charge_at_pH(7.72), 0.00, 2)

    def test_secondary_structure_fraction(self):
        """Calculate secondary structure fractions."""
        for analysis in self.analyses:
            helix, turn, sheet = analysis.secondary_structure_fraction()
            # Old test used numbers rounded to two digits, so use the same
            self.assertAlmostEqual(helix, 0.33, 2)
            self.assertAlmostEqual(turn, 0.29, 2)
            self.assertAlmostEqual(sheet, 0.37, 2)

    def test_protein_scale(self):
        """Calculate the Kite Doolittle scale."""
        # Turn black code style off
        # fmt: off
        expected = [-0.0783, +0.0358, +0.1258, +0.6950, +0.8775, +0.8350, +0.2925, +0.3383,
                    -0.1733, -0.4142, -0.5292, -0.6108, -0.8308, -0.8100, -0.8208, -1.0283,
                    -1.6300, -1.8233, -2.4267, -2.2292, -1.7817, -1.4742, -0.7467, -0.1608,
                    +0.1108, +0.2142, +0.1792, -0.1217, -0.4808, -0.4333, -0.5167, -0.2833,
                    +0.3758, +0.7225, +0.4958, +0.6033, +0.5625, +0.3108, -0.2408, -0.0575,
                    -0.3717, -0.7800, -1.1242, -1.4083, -1.7550, -2.2642, -2.8575, -2.9175,
                    -2.5358, -2.5325, -1.8142, -1.4667, -0.6058, -0.4483, +0.1300, +0.1225,
                    +0.2825, +0.1650, +0.3317, -0.2000, +0.2683, +0.1233, +0.4092, +0.1392,
                    +0.4192, +0.2758, -0.2350, -0.5750, -0.5983, -1.2067, -1.3867, -1.3583,
                    -0.8708, -0.5383, -0.3675, +0.0667, +0.0825, -0.0150, +0.1817, +0.4692,
                    +0.3017, +0.3800, +0.4825, +0.4675, +0.1575, -0.1783, -0.5175, -1.2017,
                    -1.7033, -1.5500, -1.2375, -0.8500, -0.0583, +0.3125, +0.4242, +0.7133,
                    +0.5633, +0.0483, -0.7167, -1.3158, -1.9217, -2.5033, -2.4117, -2.2483,
                    -2.3758, -2.0633, -1.8900, -1.8667, -1.9292, -1.8625, -2.0050, -2.2708,
                    -2.4050, -2.3508, -2.1758, -1.5533, -1.0350, -0.1983, -0.0233, +0.1800,
                    +0.0317, -0.0917, -0.6375, -0.9650, -1.4500, -1.6008, -1.7558, -1.5450,
                    -1.7900, -1.8133, -2.0125, -2.1383, -2.3142, -2.1525, -2.1425, -1.9733,
                    -1.4742, -0.8083, -0.2100, +0.8067, +1.3092, +1.8367, +2.0283, +2.3558]
        # Turn black code style on
        # fmt: on
        for analysis in self.analyses:
            for i, e in zip(analysis.protein_scale(ProtParamData.kd, 9, 0.4), expected):
                # Expected values have 4 decimal places, so restrict to that exactness
                self.assertAlmostEqual(i, e, places=4)

    def test_gravy(self):
        """Calculate gravy. Tests all pre-defined scales."""
        expected_values = {
            "KyteDoolitle": -0.5974,
            "Aboderin": 4.5671,
            "AbrahamLeo": 0.2378,
            "Argos": 0.8607,
            "BlackMould": 0.5074,
            "BullBreese": -0.0445,
            "Casari": -0.2414,
            "Cid": -0.0678,
            "Cowan3.4": 0.0234,
            "Cowan7.5": -0.0733,
            "Eisenberg": -0.0435,
            "Engelman": 1.600,
            "Fasman": -0.3614,
            "Fauchere": 0.327,
            "GoldSack": 1.1564,
            "Guy": 0.0675,
            "Jones": 1.223,
            "Juretic": -0.6672,
            "Kidera": 0.1383,
            "Miyazawa": 5.3109,
            "Parker": 1.7487,
            "Ponnuswamy": 0.3491,
            "Rose": 0.7147,
            "Roseman": -0.4729,
            "Sweet": -0.0791,
            "Tanford": 0.0625,
            "Wilson": 1.5493,
            "Zimmerman": 1.2841,
        }

        for analysis in self.analyses:
            for scale, exp_v in expected_values.items():
                self.assertAlmostEqual(analysis.gravy(scale=scale), exp_v, places=4)

            with self.assertRaises(ValueError) as cm:
                analysis.gravy("Wrong Scale")
            self.assertEqual("scale: Wrong Scale not known", str(cm.exception))

    def test_molar_extinction_coefficient(self):
        """Molar extinction coefficient."""
        for analysis in self.analyses:
            self.assertAlmostEqual(
                analysis.molar_extinction_coefficient()[0], 17420, places=5
            )
            self.assertAlmostEqual(
                analysis.molar_extinction_coefficient()[1], 17545, places=5
            )


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)