File: test_files.py

package info (click to toggle)
python-cutadapt 4.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,992 kB
  • sloc: python: 9,695; ansic: 177; makefile: 159
file content (131 lines) | stat: -rw-r--r-- 3,826 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import pickle

from cutadapt.files import ProxyTextFile, ProxyRecordWriter, OutputFiles
from dnaio import SequenceRecord


def test_proxy_text_file():
    newline = os.linesep.encode()
    pf = ProxyTextFile()
    print("hello", file=pf)
    assert pf.drain() == [b"hello" + newline]
    assert pf.drain() == [b""]

    print("world", file=pf, end="\n")
    print("foo", file=pf, end="\n")
    assert pf.drain() == [b"world" + newline + b"foo" + newline]


def test_proxy_test_file_pickleable():
    pf = ProxyTextFile()
    pickled = pickle.dumps(pf)

    unpickled = pickle.loads(pickled)
    assert isinstance(unpickled, ProxyTextFile)


def test_proxy_record_writer():
    pw = ProxyRecordWriter(n_files=1, qualities=True)
    pw.write(SequenceRecord("name", "ACGT", qualities="####"))
    assert pw.drain() == [
        b"@name\nACGT\n+\n####\n",
    ]

    pw.write(SequenceRecord("foo", "AA", "HH"))
    pw.write(SequenceRecord("bar", "CC", ",,"))
    assert pw.drain() == [
        b"@foo\nAA\n+\nHH\n@bar\nCC\n+\n,,\n",
    ]


def test_proxy_record_writer_paired():
    pw = ProxyRecordWriter(n_files=2, qualities=True)
    pw.write(
        SequenceRecord("name", "ACGT", qualities="####"),
        SequenceRecord("name", "GGGG", qualities="!!!!"),
    )
    assert pw.drain() == [b"@name\nACGT\n+\n####\n", b"@name\nGGGG\n+\n!!!!\n"]

    pw.write(
        SequenceRecord("foo", "AA", "HH"),
        SequenceRecord("foo", "TT", "33"),
    )
    pw.write(
        SequenceRecord("bar", "CC", ",,"),
        SequenceRecord("bar", "GGG", "444"),
    )
    assert pw.drain() == [
        b"@foo\nAA\n+\nHH\n@bar\nCC\n+\n,,\n",
        b"@foo\nTT\n+\n33\n@bar\nGGG\n+\n444\n",
    ]


def test_proxy_record_writer_picklable():
    pw = ProxyRecordWriter(n_files=2, qualities=True)
    pickled = pickle.dumps(pw)

    unpickled = pickle.loads(pickled)
    assert isinstance(unpickled, ProxyRecordWriter)
    assert unpickled._n_files == 2


class TestOutputFiles:
    def test_open_text(self, tmp_path):
        o = OutputFiles(
            proxied=False,
            qualities=False,
            interleaved=False,
        )
        path = tmp_path / "out.txt"
        f = o.open_text(path)
        print("Hello", file=f)
        o.close()
        assert path.read_text() == "Hello\n"

    def test_open_record_writer(self, tmp_path):
        o = OutputFiles(
            proxied=False,
            qualities=True,
            interleaved=False,
        )
        path = tmp_path / "out.fastq"
        f = o.open_record_writer(path)
        f.write(SequenceRecord("r", "ACGT", "####"))
        o.close()
        assert path.read_text() == "@r\nACGT\n+\n####\n"

    def test_paired_record_writer(self, tmp_path):
        o = OutputFiles(
            proxied=False,
            qualities=True,
            interleaved=False,
        )
        path1 = tmp_path / "out.1.fastq"
        path2 = tmp_path / "out.2.fastq"
        f = o.open_record_writer(path1, path2)
        f.write(
            SequenceRecord("r", "AACC", "####"), SequenceRecord("r", "GGTT", "####")
        )
        o.close()
        assert path1.read_text() == "@r\nAACC\n+\n####\n"
        assert path2.read_text() == "@r\nGGTT\n+\n####\n"

    def test_interleaved_record_writer(self, tmp_path):
        o = OutputFiles(
            proxied=False,
            qualities=True,
            interleaved=True,
        )
        path = tmp_path / "out.1.fastq"
        f = o.open_record_writer(path, interleaved=True)
        f.write(
            SequenceRecord("r", "AACC", "####"), SequenceRecord("r", "GGTT", "####")
        )
        o.close()
        assert path.read_text() == "@r\nAACC\n+\n####\n@r\nGGTT\n+\n####\n"

    # - test force fasta
    # - test qualities
    # - test proxied
    # - test complaint about duplicate file names