File: test_SeqIO_Gfa.py

package info (click to toggle)
python-biopython 1.85%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 126,372 kB
  • sloc: xml: 1,047,995; python: 332,722; ansic: 16,944; sql: 1,208; makefile: 140; sh: 81
file content (78 lines) | stat: -rw-r--r-- 3,339 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Tests for SeqIO GFA module."""

import unittest

from Bio import BiopythonWarning
from Bio import SeqIO


class TestRead(unittest.TestCase):
    def test_read_GFA1(self):
        """Test parsing valid GFA 1.x files."""
        records = list(SeqIO.parse("GFA/seq.gfa", "gfa1"))
        self.assertEqual(len(records), 8)
        self.assertEqual(records[6].id, "MTh13014")
        self.assertEqual(
            records[6].seq,
            "TTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAACCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCAAAGACC",
        )
        self.assertEqual(records[0].annotations["SN"], ("Z", "MT_human"))
        self.assertEqual(records[0].annotations["SO"], ("i", "0"))

        records = list(SeqIO.parse("GFA/seq_with_len.gfa", "gfa1"))
        self.assertEqual(len(records), 9)
        self.assertEqual(
            records[8].seq,
            "GAAAAATTGCCCTTGGTTTTCGCTTCGCTCAAACTCTATTGAACTTCGCTTTCGCTCAGTTCGTCGGGGCAATTTTTTGGTTAATACTT",
        )

        records = list(SeqIO.parse("GFA/fake_with_checksum.gfa", "gfa1"))
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].seq, "AAA")

        records = list(SeqIO.parse("GFA/no_seq.gfa", "gfa1"))
        self.assertEqual(len(records), 9)
        self.assertEqual(len(records[0]), 528)

    def test_read_GFA2(self):
        """Test parsing valid GFA 2.0 files."""
        records = list(SeqIO.parse("GFA/fake_gfa2.gfa", "gfa2"))
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].seq, "AAA")


class TestCorrupt(unittest.TestCase):
    def test_corrupt_gfa2(self):
        """Check a GFA 1.x file does not parse in GFA 2."""
        with self.assertRaises(ValueError):
            list(SeqIO.parse("GFA/seq.gfa", "gfa2"))

    def test_corrupt_segment_fields(self):
        """Check a GFA file with invalid fields on a segment line."""
        with self.assertRaises(ValueError):
            list(SeqIO.parse("GFA/corrupt_segment_fields.gfa", "gfa1"))

    def test_corrupt_len(self):
        """Check a GFA file with an incorrect length."""
        with self.assertWarns(BiopythonWarning):
            list(SeqIO.parse("GFA/corrupt_len.gfa", "gfa1"))

    def test_corrupt_checksum(self):
        """Check a GFA file with an incorrect checksum."""
        with self.assertWarns(BiopythonWarning):
            list(SeqIO.parse("GFA/corrupt_checksum.gfa", "gfa1"))

    def test_corrupt_tag_name(self):
        """Check a GFA file with an invalid tag name."""
        with self.assertWarns(BiopythonWarning):
            list(SeqIO.parse("GFA/corrupt_tag_name.gfa", "gfa1"))

    def test_corrupt_tag_type(self):
        """Check a GFA file with an incorrect tag type."""
        with self.assertWarns(BiopythonWarning):
            list(SeqIO.parse("GFA/corrupt_tag_type.gfa", "gfa1"))


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)