1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
# ----------------------------------------------------------------------------
# Copyright (c) 2013--, scikit-bio development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import unittest
import numpy as np
import numpy.testing as npt
from skbio import Protein
class TestProtein(unittest.TestCase):
def test_alphabet(self):
expected = set("ACDEFGHIKLMNPQRSTVWYBZX-.*")
self.assertIs(type(Protein.alphabet), set)
self.assertEqual(Protein.alphabet, expected)
Protein.alphabet.add("JO")
self.assertEqual(Protein.alphabet, expected)
self.assertEqual(Protein('').alphabet, expected)
with self.assertRaises(AttributeError):
Protein('').alphabet = set("ABCD")
# TODO: duplicate of test_definite_chars, remove when nondegenerate_chars,
# is removed
def test_nondegenerate_chars(self):
exp = set("ACDEFGHIKLMNPQRSTVWY")
self.assertEqual(Protein("").nondegenerate_chars, exp)
self.assertEqual(Protein.nondegenerate_chars, exp)
def test_definite_chars(self):
exp = set("ACDEFGHIKLMNPQRSTVWY")
self.assertEqual(Protein("").definite_chars, exp)
self.assertEqual(Protein.definite_chars, exp)
def test_degenerate_map(self):
exp = {
'B': set(['D', 'N']), 'Z': set(['E', 'Q']),
'X': set(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'])
}
self.assertEqual(Protein("").degenerate_map, exp)
self.assertEqual(Protein.degenerate_map, exp)
def test_stop_chars(self):
expected = set('*')
self.assertIs(type(Protein.stop_chars), set)
self.assertEqual(Protein.stop_chars, expected)
Protein.stop_chars.add("JO")
self.assertEqual(Protein.stop_chars, expected)
self.assertEqual(Protein('').stop_chars, expected)
with self.assertRaises(AttributeError):
Protein('').stop_chars = set("^&")
def test_stops(self):
npt.assert_array_equal(Protein('').stops(), np.array([]))
npt.assert_array_equal(Protein('P').stops(), np.array([False]))
npt.assert_array_equal(Protein('PAW').stops(),
np.array([False, False, False]))
npt.assert_array_equal(Protein('PAW*').stops(),
np.array([False, False, False, True]))
npt.assert_array_equal(Protein('P*W*').stops(),
np.array([False, True, False, True]))
npt.assert_array_equal(Protein('****').stops(),
np.array([True, True, True, True]))
npt.assert_array_equal(Protein('XZB-.').stops(),
np.array([False, False, False, False, False]))
def test_has_stops(self):
self.assertFalse(Protein('').has_stops())
self.assertFalse(Protein('P').has_stops())
self.assertFalse(Protein('PAW').has_stops())
self.assertTrue(Protein('PAW*').has_stops())
self.assertTrue(Protein('P*W*').has_stops())
self.assertTrue(Protein('****').has_stops())
self.assertFalse(Protein('XZB-.').has_stops())
def test_motif_n_glycosylation(self):
seq = Protein("ACDFFACGNPSL")
self.assertEqual(list(seq.find_motifs("N-glycosylation")), [])
seq = Protein("ACDFNFTACGNPSL")
self.assertEqual(list(seq.find_motifs("N-glycosylation")),
[slice(4, 8)])
seq = Protein("AC-DFN-FTACGNPSL")
self.assertEqual(list(seq.find_motifs("N-glycosylation",
ignore=seq.gaps())),
[slice(5, 10)])
def test_repr(self):
# basic sanity checks for custom repr stats. more extensive testing is
# performed on Sequence.__repr__
obs = repr(Protein(''))
# obtained from super()
self.assertIn('has gaps: False', obs)
# custom to Protein
self.assertIn('has stops: False', obs)
obs = repr(Protein('PAW'))
self.assertIn('has gaps: False', obs)
self.assertIn('has stops: False', obs)
obs = repr(Protein('PA*W-'))
self.assertIn('has gaps: True', obs)
self.assertIn('has stops: True', obs)
obs = repr(Protein('*****'))
self.assertIn('has gaps: False', obs)
self.assertIn('has stops: True', obs)
def test_cannot_subclass(self):
with self.assertRaisesRegex(TypeError, "Subclassing disabled"):
class CustomSequence(Protein):
pass
if __name__ == "__main__":
unittest.main()
|