File: test_pbcore_io_FastaTable.py

package info (click to toggle)
python-pbcore 1.7.1%2Bgit20200430.a127b1e%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,404 kB
  • sloc: python: 23,243; xml: 2,504; makefile: 232; sh: 66
file content (80 lines) | stat: -rw-r--r-- 3,128 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from pbcore import data
from pbcore.io import FastaReader, FastaWriter, IndexedFastaReader


class TestIndexedFastaReader:

    FASTAPATH = data.getFasta()

    def testIteration(self):
        ft = IndexedFastaReader(self.FASTAPATH)
        fr = FastaReader(self.FASTAPATH)
        ftContigs = list(ft)
        frContigs = list(fr)
        assert len(frContigs) == len(ftContigs)
        assert 48 == len(ftContigs)
        for ftC, frC in zip(ftContigs, frContigs):
            assert frC.header == ftC.header
            assert frC.sequence == ftC.sequence[:]

        # Unlike FastaReader, IndexedFastaReader iteration is repeatable.
        assert 48 == len(list(ft))

    def testAccessByName(self):
        ft = IndexedFastaReader(self.FASTAPATH)
        r000021 = ft["ref000021|EGFR_Exon_22\tMetadataTest"]
        assert "ref000021|EGFR_Exon_22\tMetadataTest" == r000021.header
        assert "ref000021|EGFR_Exon_22" == r000021.id
        assert "MetadataTest" == r000021.comment
        assert ("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT"
                "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA"
                "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG") == r000021.sequence[:]

    def testAccessById(self):
        ft = IndexedFastaReader(self.FASTAPATH)
        r000021 = ft["ref000021|EGFR_Exon_22"]
        assert "ref000021|EGFR_Exon_22\tMetadataTest" == r000021.header
        assert "ref000021|EGFR_Exon_22" == r000021.id
        assert "MetadataTest" == r000021.comment
        assert ("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT"
                "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA"
                "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG") == r000021.sequence[:]

    def testAccessByPosition(self):
        ft = IndexedFastaReader(self.FASTAPATH)
        r000001 = ft[0]
        assert "<IndexedFastaRecord: ref000001|EGFR_Exon_2>" == repr(r000001)
        firstTwo = ft[:2]
        assert [ft[0], ft[1]] == firstTwo
        lastTwo = ft[-2:]
        assert [ft[-2], ft[-1]] == lastTwo

    def testSlice(self):
        ft = IndexedFastaReader(self.FASTAPATH)
        r000021 = ft["ref000021|EGFR_Exon_22"]
        sequence = r000021.sequence
        assert "CACTGCCTCA" == sequence[0:10]
        assert "GCCAAGCTGG" == sequence[-10:]
        assert "G" == sequence[-1]
        assert "T" == sequence[-3]
        assert "C" == sequence[0]
        assert "A" == sequence[1]

    def test_dosLineEndingsFasta(self):
        fr = FastaReader(data.getDosFormattedFasta())
        frEntries = list(fr)

        ft = IndexedFastaReader(data.getDosFormattedFasta())
        ftEntries = list(ft)

        assert len(frEntries) == len(ftEntries)
        for (frE, ftE) in zip(frEntries, ftEntries):
            assert frE.header == ftE.header
            assert frE.sequence == ftE.sequence[:]

    def test_readWeirdFastaIndex(self):
        f = IndexedFastaReader(data.getWeird())
        entries = list(f)
        assert 1 == len(entries)
        assert "chr1" == entries[0].header
        assert "acgtacgtacgtact" == entries[0].sequence[:]