File: test_pbcore_io_FastaTable.py

package info (click to toggle)
python-pbcore 1.2.11%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 18,612 kB
  • ctags: 5,336
  • sloc: python: 22,160; xml: 2,667; makefile: 239
file content (80 lines) | stat: -rw-r--r-- 3,197 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from nose.tools import assert_equal, assert_true, assert_false
from pbcore import data
from pbcore.io import FastaReader, FastaWriter, IndexedFastaReader


class TestIndexedFastaReader:

    def setup(self):
        self.fastaPath = data.getFasta()

    def testIteration(self):
        ft = IndexedFastaReader(self.fastaPath)
        fr = FastaReader(self.fastaPath)
        ftContigs = list(ft)
        frContigs = list(fr)
        assert_equal(len(frContigs), len(ftContigs))
        assert_equal(48, len(ftContigs))
        for ftC, frC in zip(ftContigs, frContigs):
            assert_equal(frC.header, ftC.header)
            assert_equal(frC.sequence, ftC.sequence[:])

        # Unlike FastaReader, IndexedFastaReader iteration is repeatable.
        assert_equal(48, len(list(ft)))

    def testAccessByName(self):
        ft = IndexedFastaReader(self.fastaPath)
        r000021 = ft["ref000021|EGFR_Exon_22\tMetadataTest"]
        assert_equal("ref000021|EGFR_Exon_22\tMetadataTest", r000021.header)
        assert_equal("ref000021|EGFR_Exon_22", r000021.id)
        assert_equal("MetadataTest", r000021.comment)
        assert_equal("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT"
                     "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA"
                     "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG",
                     r000021.sequence[:])

    def testAccessById(self):
        ft = IndexedFastaReader(self.fastaPath)
        r000021 = ft["ref000021|EGFR_Exon_22"]
        assert_equal("ref000021|EGFR_Exon_22\tMetadataTest", r000021.header)
        assert_equal("ref000021|EGFR_Exon_22", r000021.id)
        assert_equal("MetadataTest", r000021.comment)
        assert_equal("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT"
                     "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA"
                     "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG",
                     r000021.sequence[:])

    def testAccessByPosition(self):
        ft = IndexedFastaReader(self.fastaPath)
        r000001 = ft[0]
        assert_equal("<IndexedFastaRecord: ref000001|EGFR_Exon_2>", repr(r000001))
        firstTwo = ft[:2]
        assert_equal([ft[0], ft[1]], firstTwo)
        lastTwo = ft[-2:]
        assert_equal([ft[-2], ft[-1]], lastTwo)

    def testSlice(self):
        ft = IndexedFastaReader(self.fastaPath)
        r000021 = ft["ref000021|EGFR_Exon_22"]
        sequence = r000021.sequence
        assert_equal("CACTGCCTCA",
                     sequence[0:10])
        assert_equal("GCCAAGCTGG",
                     sequence[-10:])
        assert_equal("G", sequence[-1])
        assert_equal("T", sequence[-3])
        assert_equal("C", sequence[0])
        assert_equal("A", sequence[1])


    def test_dosLineEndingsFasta(self):
        fr = FastaReader(data.getDosFormattedFasta())
        frEntries = list(fr)

        ft = IndexedFastaReader(data.getDosFormattedFasta())
        ftEntries = list(ft)

        assert_equal(len(frEntries), len(ftEntries))
        for (frE, ftE) in zip(frEntries, ftEntries):
            assert_equal(frE.header, ftE.header)
            assert_equal(frE.sequence, ftE.sequence[:])