File: test_SeqIO_Insdc.py

package info (click to toggle)
python-biopython 1.85%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 126,372 kB
  • sloc: xml: 1,047,995; python: 332,722; ansic: 16,944; sql: 1,208; makefile: 140; sh: 81
file content (155 lines) | stat: -rw-r--r-- 5,767 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Copyright 2013 by Peter Cock.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for SeqIO Insdc module."""

import unittest
import warnings
from io import StringIO

from seq_tests_common import SeqRecordTestBaseClass
from test_SeqIO import SeqIOConverterTestBaseClass

from Bio import BiopythonParserWarning
from Bio import BiopythonWarning
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature
from Bio.SeqFeature import SimpleLocation
from Bio.SeqRecord import SeqRecord


class TestEmbl(unittest.TestCase):
    def test_annotation1(self):
        """Check parsing of annotation from EMBL files (1)."""
        record = SeqIO.read("EMBL/TRBG361.embl", "embl")
        self.assertEqual(len(record), 1859)
        # Single keyword:
        self.assertEqual(record.annotations["keywords"], ["beta-glucosidase"])
        self.assertEqual(record.annotations["topology"], "linear")

    def test_annotation2(self):
        """Check parsing of annotation from EMBL files (2)."""
        record = SeqIO.read("EMBL/DD231055_edited.embl", "embl")
        self.assertEqual(len(record), 315)
        # Multiple keywords:
        self.assertEqual(
            record.annotations["keywords"],
            [
                "JP 2005522996-A/12",
                "test-data",
                "lot and lots of keywords for this example",
                "multi-line keywords",
            ],
        )
        self.assertEqual(record.annotations["topology"], "linear")

    def test_annotation3(self):
        """Check parsing of annotation from EMBL files (3)."""
        record = SeqIO.read("EMBL/AE017046.embl", "embl")
        self.assertEqual(len(record), 9609)
        # TODO: Should this be an empty list, or simply absent?
        self.assertEqual(record.annotations["keywords"], [""])
        self.assertEqual(record.annotations["topology"], "circular")

    def test_annotation4(self):
        """Check parsing of annotation from EMBL files (4)."""
        with self.assertWarns(BiopythonParserWarning):
            record = SeqIO.read("EMBL/location_wrap.embl", "embl")
        self.assertEqual(len(record), 120)
        self.assertNotIn("keywords", record.annotations)
        # The ID line has the topology as unspecified:
        self.assertNotIn("topology", record.annotations)

    def test_writing_empty_qualifiers(self):
        f = SeqFeature(
            SimpleLocation(5, 20, strand=+1),
            type="region",
            qualifiers={"empty": None, "zero": 0, "one": 1, "text": "blah"},
        )
        record = SeqRecord(Seq("A" * 100), "dummy", features=[f])
        record.annotations["molecule_type"] = "DNA"
        gbk = record.format("gb")
        self.assertIn(" /empty\n", gbk)
        self.assertIn(" /zero=0\n", gbk)
        self.assertIn(" /one=1\n", gbk)
        self.assertIn(' /text="blah"\n', gbk)

    def test_warn_on_writing_nonstandard_feature_key(self):
        f = SeqFeature(
            SimpleLocation(5, 20, strand=+1),
            type="a" * 16,
            qualifiers={"empty": None, "zero": 0, "one": 1, "text": "blah"},
        )
        record = SeqRecord(Seq("A" * 100), "dummy", features=[f])
        record.annotations["molecule_type"] = "DNA"
        with self.assertWarns(BiopythonWarning):
            record.format("gb")

    def test_warn_on_writing_nonstandard_qualifier_key(self):
        f = SeqFeature(
            SimpleLocation(5, 20, strand=+1),
            type="region",
            qualifiers={"a" * 21: "test"},
        )
        record = SeqRecord(Seq("A" * 100), "dummy", features=[f])
        record.annotations["molecule_type"] = "DNA"
        with self.assertWarns(BiopythonWarning):
            record.format("gb")


class TestEmblRewrite(SeqRecordTestBaseClass):
    def check_rewrite(self, filename):
        old = SeqIO.read(filename, "embl")

        # TODO - Check these properties:
        old.dbxrefs = []
        old.annotations["accessions"] = old.annotations["accessions"][:1]
        del old.annotations["references"]

        buffer = StringIO()
        self.assertEqual(1, SeqIO.write(old, buffer, "embl"))
        buffer.seek(0)
        new = SeqIO.read(buffer, "embl")

        self.compare_record(old, new)

    def test_annotation1(self):
        """Check writing-and-parsing EMBL file (1)."""
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            self.check_rewrite("EMBL/TRBG361.embl")

    def test_annotation2(self):
        """Check writing-and-parsing EMBL file (2)."""
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            self.check_rewrite("EMBL/DD231055_edited.embl")

    def test_annotation3(self):
        """Check writing-and-parsing EMBL file (3)."""
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            self.check_rewrite("EMBL/AE017046.embl")


class ConvertTestsInsdc(SeqIOConverterTestBaseClass):
    def test_conversion(self):
        """Test format conversion by SeqIO.write/SeqIO.parse and SeqIO.convert."""
        tests = [
            ("EMBL/U87107.embl", "embl"),
            ("EMBL/TRBG361.embl", "embl"),
            ("GenBank/NC_005816.gb", "gb"),
            ("GenBank/cor6_6.gb", "genbank"),
        ]
        for filename, fmt in tests:
            for in_format, out_format in self.formats:
                if in_format != fmt:
                    continue
                self.check_conversion(filename, in_format, out_format)


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)