File: test_FastaRecord.py

package info (click to toggle)
python-pyfaidx 0.4.8.1-1~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 444 kB
  • sloc: python: 2,167; makefile: 11
file content (127 lines) | stat: -rw-r--r-- 4,925 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import sys
from pyfaidx import Fasta
from tempfile import NamedTemporaryFile
from unittest import TestCase
from nose.tools import raises
from difflib import Differ

path = os.path.dirname(__file__)
os.chdir(path)

class TestFastaRecord(TestCase):
    def setUp(self):
        pass

    def tearDown(self):
        try:
            os.remove('data/genes.fasta.fai')
        except EnvironmentError:
            pass  # some tests may delete this file

    def test_sequence_uppercase(self):
        """Test that the sequence is always returned in
        uppercase, even if it is in lowercase in the
        reference genome.
        """
        filename = "data/genes.fasta.lower"
        reference_upper = Fasta(filename, sequence_always_upper=True)
        reference_normal = Fasta(filename)
        os.remove('data/genes.fasta.lower.fai')
        assert reference_upper['gi|557361099|gb|KF435150.1|'][1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][1:100].seq.upper()


    def test_long_names(self):
        """ Test that deflines extracted using FastaRecord.long_name are
        identical to deflines in the actual file.
        """
        deflines = []
        with open('data/genes.fasta') as fasta_file:
            for line in fasta_file:
                if line[0] == '>':
                    deflines.append(line[1:-1])
        fasta = Fasta('data/genes.fasta')
        long_names = []
        for record in fasta:
            long_names.append(record.long_name)
        assert deflines == long_names

    def test_issue_62(self):
        """ Check for pathogenic FastaRecord.long_name behavior in mdshw5/pyfaidx#62 """
        deflines = []
        line_len = None
        with open('data/genes.fasta', 'rb') as fasta_file:
            with open('data/issue_62.fa', 'wb') as fasta_uniform_len:
                for line in fasta_file:
                    if line.startswith(b'>'):
                        deflines.append(line[1:-1].decode('ascii'))
                        fasta_uniform_len.write(line)
                    elif line_len is None:
                        line_len = len(line)
                        fasta_uniform_len.write(line)
                    elif line_len > len(line):
                        fasta_uniform_len.write(line.rstrip() + b'N' * (line_len - len(line)) + b'\n')
                    else:
                        fasta_uniform_len.write(line)
        fasta = Fasta('data/issue_62.fa', as_raw=True)
        long_names = []
        for record in fasta:
            long_names.append(record.long_name)
        try:
            os.remove('data/issue_62.fa')
            os.remove('data/issue_62.fa.fai')
        except EnvironmentError:
            pass
        sys.stdout.writelines(tuple(Differ().compare(deflines, long_names)))
        assert deflines == long_names

class TestMutableFastaRecord(TestCase):
    def setUp(self):
        with open('data/genes_mutable.fasta', 'wb') as mutable:
            mutable.write(open('data/genes.fasta', 'rb').read())
        self.mutable_fasta = Fasta('data/genes_mutable.fasta', mutable=True)

    def tearDown(self):
        try:
            os.remove('data/genes.fasta.fai')
        except EnvironmentError:
            pass  # some tests may delete this file
        try:
            os.remove('data/genes_mutable.fasta')
        except EnvironmentError:
            pass  # some tests may delete this file
        try:
            os.remove('data/genes_mutable.fasta.fai')
        except EnvironmentError:
            pass  # some tests may delete this file

    def test_mutate_fasta_to_same(self):
        mutable = Fasta('data/genes_mutable.fasta', mutable=True)
        fasta = Fasta('data/genes.fasta', mutable=False)
        chunk = fasta['gi|557361099|gb|KF435150.1|'][0:100]
        mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk.seq
        assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(mutable['gi|557361099|gb|KF435150.1|'])

    def test_mutate_fasta_to_N(self):
        mutable = Fasta('data/genes_mutable.fasta', mutable=True)
        chunk = 100 * 'N'
        mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk
        assert mutable['gi|557361099|gb|KF435150.1|'][0:100].seq == chunk

    def test_mutate_single_position(self):
        mutable = Fasta('data/genes_mutable.fasta', mutable=True)
        chunk = 'N'
        mutable['gi|557361099|gb|KF435150.1|'][0] = chunk
        assert mutable['gi|557361099|gb|KF435150.1|'][0].seq == chunk

    @raises(TypeError)
    def test_mutate_immutable_fasta(self):
        mutable = Fasta('data/genes_mutable.fasta', mutable=False)
        chunk = 100 * 'N'
        mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk

    @raises(IOError)
    def test_mutate_too_long(self):
        mutable = Fasta('data/genes_mutable.fasta', mutable=True)
        chunk = 101 * 'N'
        mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk