File: test_SeqIO_PdbIO.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (117 lines) | stat: -rw-r--r-- 4,749 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Copyright 2012 by Eric Talevich.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import unittest
import warnings

try:
    import numpy
    from numpy import dot  # Missing on PyPy's micronumpy
    del dot
    # We don't need this (?) but Bio.PDB imports it automatically :(
    from numpy.linalg import svd, det  # Missing in PyPy 2.0 numpypy
except ImportError:
    from Bio import MissingPythonDependencyError
    raise MissingPythonDependencyError(
        "Install NumPy if you want to use PDB formats with SeqIO.")

from Bio import SeqIO
from Bio.PDB.PDBExceptions import PDBConstructionWarning


class TestPdbSeqres(unittest.TestCase):
    def test_seqres_parse(self):
        """Parse a multi-chain PDB by SEQRES entries.

        Reference:
        http://www.rcsb.org/pdb/files/fasta.txt?structureIdList=2BEG
        """
        chains = list(SeqIO.parse('PDB/2BEG.pdb', 'pdb-seqres'))
        self.assertEqual(len(chains), 5)
        actual_seq = 'DAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIA'
        for chain, chn_id in zip(chains, 'ABCDE'):
            self.assertEqual(chain.id, '2BEG:' + chn_id)
            self.assertEqual(chain.annotations['chain'], chn_id)
            self.assertEqual(str(chain.seq), actual_seq)

    def test_seqres_read(self):
        """Read a single-chain PDB by SEQRES entries.

        Reference:
        http://www.rcsb.org/pdb/files/fasta.txt?structureIdList=1A8O
        """
        chain = SeqIO.read('PDB/1A8O.pdb', 'pdb-seqres')
        self.assertEqual(chain.id, '1A8O:A')
        self.assertEqual(chain.annotations['chain'], 'A')
        self.assertEqual(str(chain.seq),
                         'MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTIL'
                         'KALGPGATLEEMMTACQG')

    def test_seqres_missing(self):
        """Parse a PDB with no SEQRES entries."""
        chains = list(SeqIO.parse('PDB/a_structure.pdb', 'pdb-seqres'))
        self.assertEqual(len(chains), 0)


class TestPdbAtom(unittest.TestCase):
    def test_atom_parse(self):
        """Parse a multi-chain PDB by ATOM entries.

        Reference:
        http://www.rcsb.org/pdb/files/fasta.txt?structureIdList=2BEG
        """
        chains = list(SeqIO.parse('PDB/2BEG.pdb', 'pdb-atom'))
        self.assertEqual(len(chains), 5)
        actual_seq = 'LVFFAEDVGSNKGAIIGLMVGGVVIA'
        for chain, chn_id in zip(chains, 'ABCDE'):
            self.assertEqual(chain.id, '2BEG:' + chn_id)
            self.assertEqual(chain.annotations['chain'], chn_id)
            self.assertEqual(str(chain.seq), actual_seq)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            chains = list(SeqIO.parse('PDB/2XHE.pdb', 'pdb-atom'))
        actual_seq = 'DRLSRLRQMAAENQXXXXXXXXXXXXXXXXXXXXXXXPEPFMADFFNRVK'\
                     'RIRDNIEDIEQAIEQVAQLHTESLVAVSKEDRDRLNEKLQDTMARISALG'\
                     'NKIRADLKQIEKENKRAQQEGTFEDGTVSTDLRIRQSQHSSLSRKFVKVM'\
                     'TRYNDVQAENKRRYGENVARQCRVVEPSLSDDAIQKVIEHGXXXXXXXXX'\
                     'XXXXXXXXNEIRDRHKDIQQLERSLLELHEMFTDMSTLVASQGEMIDRIE'\
                     'FSVEQSHNYV'
        self.assertEqual(str(chains[1].seq), actual_seq)

    def test_atom_read(self):
        """Read a single-chain PDB by ATOM entries.

        Reference:
        http://www.rcsb.org/pdb/files/fasta.txt?structureIdList=1A8O
        """
        chain = SeqIO.read('PDB/1A8O.pdb', 'pdb-atom')
        self.assertEqual(chain.id, '1A8O:A')
        self.assertEqual(chain.annotations['chain'], 'A')
        self.assertEqual(str(chain.seq),
                         'MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTIL'
                         'KALGPGATLEEMMTACQG')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            chain = SeqIO.read('PDB/a_structure.pdb', 'pdb-atom')
        self.assertEqual(chain.id, '????:A')
        self.assertEqual(chain.annotations['chain'], 'A')
        self.assertEqual(str(chain.seq), 'E')

    def test_atom_noheader(self):
        """Parse a PDB with no HEADER line."""

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            warnings.simplefilter('ignore', UserWarning)
            chains = list(SeqIO.parse('PDB/1LCD.pdb', 'pdb-atom'))

        self.assertEqual(len(chains), 1)
        self.assertEqual(str(chains[0].seq), 'MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR')


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)