File: test_pbcore_io_FastaIO.py

package info (click to toggle)
python-pbcore 2.1.2%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,476 kB
  • sloc: python: 13,393; xml: 2,504; makefile: 232; sh: 66
file content (127 lines) | stat: -rw-r--r-- 4,679 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from pbcore import data
from pbcore.io import FastaReader, FastaWriter, FastaRecord
from io import StringIO


class TestFastaRecord:

    HEADER = "chr1|blah|blah\tblah blah"
    RC_HEADER = "chr1|blah|blah\tblah blah [revcomp]"
    ID = "chr1|blah|blah"
    COMMENT = "blah blah"
    SEQUENCE = "GATTACA" * 20
    RC_SEQUENCE = "TGTAATC" * 20
    LENGTH = 140
    EXPECTED__STR__ = (
        ">chr1|blah|blah\tblah blah\n"
        "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT\n"
        "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG\n"
        "ATTACAGATTACAGATTACA")
    RC1_EXPECTED__STR__ = (
        ">chr1|blah|blah\tblah blah [revcomp]\n"
        "TGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTA\n"
        "ATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCT\n"
        "GTAATCTGTAATCTGTAATC")
    RC2_EXPECTED__STR__ = (
        ">chr1|blah|blah\tblah blah\n"
        "TGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTA\n"
        "ATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCT\n"
        "GTAATCTGTAATCTGTAATC")

    @classmethod
    def setup_class(cls):
        cls.record = FastaRecord(cls.HEADER, cls.SEQUENCE)
        cls.rc1_record = cls.record.reverseComplement()
        cls.rc2_record = cls.record.reverseComplement(True)

    def test__init__(self):
        assert self.HEADER == self.record.header
        assert self.SEQUENCE == self.record.sequence
        assert self.ID == self.record.id
        assert self.COMMENT == self.record.comment

    def test__str__(self):
        assert self.EXPECTED__STR__ == str(self.record)

    def test_fromString(self):
        recordFromString = FastaRecord.fromString(self.EXPECTED__STR__)
        assert self.HEADER == recordFromString.header
        assert self.SEQUENCE == recordFromString.sequence

    def test_reverse_complement1(self):
        assert self.rc1_record.header == self.RC_HEADER
        assert self.rc1_record.sequence == self.RC_SEQUENCE
        assert self.RC1_EXPECTED__STR__ == str(self.rc1_record)

    def test_reverse_complement2(self):
        assert self.rc2_record.header == self.HEADER
        assert self.rc2_record.sequence == self.RC_SEQUENCE
        assert self.RC2_EXPECTED__STR__ == str(self.rc2_record)

    def test_len(self):
        assert self.LENGTH == len(self.record)
        assert self.LENGTH == len(self.rc1_record)
        assert self.LENGTH == len(self.rc2_record)

    def test_eq(self):
        header = 'r1'
        seq = 'ACGT'
        r1 = FastaRecord(header, seq)
        r2 = FastaRecord(header, seq)
        assert r1 == r2

    def test_not_equal(self):
        r1 = FastaRecord('r1', 'ACGT')
        r2 = FastaRecord('r2', 'ACGT')
        r3 = FastaRecord('r1', 'ACGT')
        assert r1 != r2
        assert not r1 != r3


class TestFastaReader:

    def test_readFasta(self):
        f = FastaReader(data.getFasta())
        entries = list(f)
        assert 48 == len(entries)
        assert "ref000001|EGFR_Exon_2" == entries[0].header
        assert ("TTTCTTCCAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTT"
                "TGAAGATCATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCC"
                "TTGGGAATTTGGAAATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTA"
                "AAGGTTGGTGACTTTGATTTTCCT") == entries[0].sequence

    def test_dosLineEndingsFasta(self):
        f = FastaReader(data.getDosFormattedFasta())
        entries = list(f)
        for e in entries:
            assert "\r" not in e.header
            assert 16 == len(e.sequence)


class TestFastaWriter:

    def setup_method(self):
        self.fasta1 = StringIO(
            ">chr1|blah|blah\n"
            "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT\n"
            "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG\n"
            "ATTACAGATTACAGATTACA\n")
        self.fasta2 = StringIO(self.fasta1.getvalue() +
                               ">chr2|blah|blah\n"
                               "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT\n"
                               "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG\n"
                               "ATTACAGATTACAGATTACA\n")

    def test_writeFasta1(self):
        f = StringIO()
        w = FastaWriter(f)
        for record in FastaReader(self.fasta1):
            w.writeRecord(record)
        assert self.fasta1.getvalue() == f.getvalue()

    def test_writeFasta2(self):
        f = StringIO()
        w = FastaWriter(f)
        for record in FastaReader(self.fasta2):
            w.writeRecord(record.header, record.sequence)
        assert self.fasta2.getvalue() == f.getvalue()