1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
# Copyright 2013 by Peter Cock. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
import unittest
from Bio._py3k import StringIO
from Bio import SeqIO
from Bio.Alphabet import generic_dna
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.SeqRecord import SeqRecord
from seq_tests_common import compare_record
class TestEmbl(unittest.TestCase):
def test_annotation1(self):
"""Check parsing of annotation from EMBL files (1)."""
record = SeqIO.read("EMBL/TRBG361.embl", "embl")
self.assertEqual(len(record), 1859)
# Single keyword:
self.assertEqual(record.annotations["keywords"], ["beta-glucosidase"])
self.assertEqual(record.annotations["topology"], "linear")
def test_annotation2(self):
"""Check parsing of annotation from EMBL files (2)."""
record = SeqIO.read("EMBL/DD231055_edited.embl", "embl")
self.assertEqual(len(record), 315)
# Multiple keywords:
self.assertEqual(record.annotations["keywords"],
['JP 2005522996-A/12', 'test-data',
'lot and lots of keywords for this example',
'multi-line keywords'])
self.assertEqual(record.annotations["topology"], "linear")
def test_annotation3(self):
"""Check parsing of annotation from EMBL files (3)."""
record = SeqIO.read("EMBL/AE017046.embl", "embl")
self.assertEqual(len(record), 9609)
# TODO: Should this be an empty list, or simply absent?
self.assertEqual(record.annotations["keywords"], [""])
self.assertEqual(record.annotations["topology"], "circular")
def test_annotation4(self):
"""Check parsing of annotation from EMBL files (4)."""
record = SeqIO.read("EMBL/location_wrap.embl", "embl")
self.assertEqual(len(record), 120)
self.assertTrue("keywords" not in record.annotations)
# The ID line has the topology as unspecified:
self.assertTrue("topology" not in record.annotations)
def test_writing_empty_qualifiers(self):
f = SeqFeature(FeatureLocation(5, 20, strand=+1),
type="region",
qualifiers={"empty": None,
"zero": 0,
"one": 1,
"text": "blah"})
record = SeqRecord(Seq("A" * 100, generic_dna), "dummy",
features=[f])
gbk = record.format("gb")
self.assertTrue(' /empty\n' in gbk, gbk)
self.assertTrue(' /zero=0\n' in gbk, gbk)
self.assertTrue(' /one=1\n' in gbk, gbk)
self.assertTrue(' /text="blah"\n' in gbk, gbk)
class TestEmblRewrite(unittest.TestCase):
def check_rewrite(self, filename):
old = SeqIO.read(filename, "embl")
# TODO - Check these properties:
old.dbxrefs = []
old.annotations['accessions'] = old.annotations['accessions'][:1]
del old.annotations['references']
buffer = StringIO()
self.assertEqual(1, SeqIO.write(old, buffer, "embl"))
buffer.seek(0)
new = SeqIO.read(buffer, "embl")
self.assertTrue(compare_record(old, new))
def test_annotation1(self):
"""Check writing-and-parsing EMBL file (1)."""
self.check_rewrite("EMBL/TRBG361.embl")
def test_annotation2(self):
"""Check writing-and-parsing EMBL file (2)."""
self.check_rewrite("EMBL/DD231055_edited.embl")
def test_annotation3(self):
"""Check writing-and-parsing EMBL file (3)."""
self.check_rewrite("EMBL/AE017046.embl")
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)
|