File: test_parsers.py

package info (click to toggle)
htseq 2.0.9%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 103,476 kB
  • sloc: python: 6,280; sh: 211; cpp: 147; makefile: 80
file content (185 lines) | stat: -rw-r--r-- 5,382 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import pytest
import sys
import os
import glob
import sysconfig
from pathlib import Path

build_dir = "build/lib.%s-%s" % (sysconfig.get_platform(), sys.version[0:3])

sys.path.insert(0, os.path.join(os.getcwd(), build_dir))
import HTSeq


def test_fasta_parser(data_folder):
    print("Test Fasta parser")
    for seq in HTSeq.FastaReader(data_folder+'fastaExLong.fa'):
        pass
    print("Test passed")
    print("Test Fasta parser (raw iterator)")
    for seq in HTSeq.FastaReader(data_folder+'fastaExLong.fa',
                                 raw_iterator=True):
        pass
    print("Test passed")

    print('Test Fasta parser (with statement)')
    with HTSeq.FastaReader(data_folder+'fastaExLong.fa') as f:
        for seq in f:
            pass
    print('Test passed')

    print('Test Fasta parser (with statement and file handle)')
    with open(data_folder+'fastaExLong.fa') as fraw:
        f = HTSeq.FastaReader(fraw)
        for seq in f:
            pass
    print('Test passed')


def test_fastq_parser(data_folder):
    print("Test Fastq parser")
    for seq in HTSeq.FastqReader(data_folder+'fastqEx.fastq'):
        pass
    print("Test passed")
    print("Test Fastq parser on gzip input")
    for seq in HTSeq.FastqReader(data_folder+'fastqExgzip.fastq.gz'):
        pass
    print("Test passed")
    print("Test Fastq parser on gzip input (raw iterator)")
    for seq in HTSeq.FastqReader(data_folder+'fastqExgzip.fastq.gz',
                                 raw_iterator=True):
        pass
    print("Test passed")

    print('Test Fastq parser (with statement)')
    with HTSeq.FastqReader(data_folder+'fastqExgzip.fastq.gz') as f:
        for seq in f:
            pass
    print('Test passed')

    print('Test Fastq parser (with statement and file handle)')
    import gzip
    with gzip.open(data_folder+'fastqExgzip.fastq.gz', 'rt') as fraw:
        f = HTSeq.FastqReader(fraw)
        for seq in f:
            pass
    print('Test passed')


def test_path_like_reading(data_folder):
    data_file = Path(data_folder) / 'yeast_RNASeq_excerpt.sam'
    print('Test BAM reader with pathlib.Path')
    bamfile = HTSeq.BAM_Reader(data_file)
    for read in bamfile:
        pass
    print('Test passed')

    print('Test BAM reader (with statement) with pathlib.Path')
    with HTSeq.BAM_Reader(data_file) as f:
        for read in f:
            pass
    print('Test passed')


def test_open_handle_reading(data_folder):
    data_file = Path(data_folder) / 'yeast_RNASeq_excerpt.sam'
    print('Test BAM reader with open handle')
    with open(data_file) as f:
        reader = HTSeq.BAM_Reader(f)
        for read in reader:
            pass
    print('Test passed')


def test_unreadable_object_reading():
    unreadable_object = object()

    print('Test BAM reader with unreadable object')
    with pytest.raises(TypeError):
        reader = HTSeq.BAM_Reader(unreadable_object)
        for read in reader:
            pass
    print('Test passed')

    print('Test BAM reader with unreadable object, context manager')
    with pytest.raises(TypeError):
        with HTSeq.BAM_Reader(unreadable_object) as reader:
            for read in reader:
                pass
    print('Test passed')


def test_bam_reader(data_folder):
    print('Test BAM reader')
    bamfile = HTSeq.BAM_Reader(data_folder+"yeast_RNASeq_excerpt.sam")
    for read in bamfile:
        pass
    print('Test passed')

    print('Test BAM reader (with statement)')
    with HTSeq.BAM_Reader(data_folder+"yeast_RNASeq_excerpt.sam") as f:
        for read in f:
            pass
    print('Test passed')


def test_bam_inconsistent_mate(data_folder):
    print('Test inconsistent BAM file')
    bamfile = HTSeq.BAM_Reader(data_folder+"inconsistent_mate.bam")
    for read in bamfile:
        pass
    print("Test passed")


def test_bam_optional_field(data_folder):
    print('Test optional fields in BAM alignment')
    bamfile = HTSeq.BAM_Reader(data_folder+"inconsistent_mate.bam")
    for read in bamfile:
        read.has_optional_field('NO')
        break
    print("Test passed")


def test_GFF3(data_folder):
    print('Test GFF3 reader')
    with HTSeq.GFF_Reader(data_folder+'GCF_000001405.39_GRCh38.p13_genomic_subsample.gff.gz') as reader:
        for line in reader:
            pass
    print("Test passed")


def test_pickle():
    import pickle

    print('Test pickling and inpickling')
    pickles = [
            {'name': 'HTSeq.Sequence',
             'object': HTSeq.Sequence(b'ACTG', 'sequence'),
             'assert_properties': ('seq', 'name', 'descr')},
            ]

    for pic in pickles:
        print('Pickling '+pic['name'])
        pickled = pickle.dumps(pic['object'])
        print('Done')

        print('Unpickling '+pic['name'])
        unpick = pickle.loads(pickled)
        print('Done')

        if 'assert_properties' in pic:
            print('Checking serialized/deserialized')
            for prop in pic['assert_properties']:
                assert getattr(pic['object'], prop) == getattr(unpick, prop)
            print('Done')
    print("Test passed")


def test_bamfile_nosq(data_folder):
    print('Test parsing BAM file with no SQ field (e.g. PacBio)')
    bamfile = HTSeq.BAM_Reader(
            data_folder+"short_test_ccs.bam",
            check_sq=False)
    for read in bamfile:
        pass
    print("Test passed")