File: test_qualtrim.py

package info (click to toggle)
python-cutadapt 4.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,992 kB
  • sloc: python: 9,695; ansic: 177; makefile: 159
file content (83 lines) | stat: -rw-r--r-- 3,053 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pytest

from dnaio import SequenceRecord
from cutadapt.qualtrim import nextseq_trim_index, expected_errors, poly_a_trim_index


def test_nextseq_trim():
    s = SequenceRecord("n", "", "")
    assert nextseq_trim_index(s, cutoff=22) == 0
    s = SequenceRecord(
        "n",
        "TCTCGTATGCCGTCTTATGCTTGAAAAAAAAAAGGGGGGGGGGGGGGGGGNNNNNNNNNNNGGNGG",
        "AA//EAEE//A6///E//A//EA/EEEEEEAEA//EEEEEEEEEEEEEEE###########EE#EA",
    )
    assert nextseq_trim_index(s, cutoff=22) == 33


@pytest.mark.parametrize(
    "sequence,tail",
    [
        ("", ""),
        ("GGGGGGGGAAAGAAGAAGAAGAAGAAGAAG", ""),
        ("TTTAGA", ""),  # shorter than three nucleotides
        ("TTTAGAA", ""),  # shorter than three nucleotides
        ("TTTAG", "AAA"),
        ("TCAAGAAGTCCTTTACCAGCTTTC", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
        ("TCAAGAAGTCCTTTACCAGCTTTC", "AAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
        ("GCAGATCACCTT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAATAAA"),
        ("GCAGATCACCTT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAT"),
        ("GCAGATCACCTT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAATCG"),
        ("GCAGATCACCTAT", "AAAACAAAAAAACAAAAAAAACAAAAAA"),
        ("TTTT", "AAATAAAA"),
        ("GGGGGGGGAAAGAAGAAGAAGAAGAAGAAG", "AAA"),
    ],
)
def test_poly_a_trim_index(sequence, tail):
    assert poly_a_trim_index(sequence + tail) == len(sequence)


@pytest.mark.parametrize(
    "head,sequence",
    [
        ("", ""),
        ("", "GGGGGGGGAAAGAAGAAGAAGAAGAAGAAG"),
        ("", "TGTCCC"),
        ("", "TTGTCCC"),
        ("TTT", "GTCCC"),
        (
            "TTTTTTTTTTTTTTTTTTTTT",
            "CAAGAAGTCCCCAGCTTTC",
        ),
        ("TTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "CAAGAAGTCCTTTACCAGCTTTC"),
        ("TTTTTATTTTTTTTTTTTTTTTTTTTTTTTTT", "GCAGATCACCTT"),
        ("ATTTTTTTTTTTTTTTTTTTTTTTTTTTT", "GCAGATCACCTT"),
        ("AGCTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "GCAGATCACCTT"),
        ("TTTTGTTTTTTTGTTTTTTTTGTTTTTT", "GCAGATCACCTAT"),
        ("TTTATTTT", "AAAA"),
        ("TTT", "GGGGGGGGAAAGAAGAAGAAGAAGAAGAAG"),
    ],
)
def test_poly_t_trim_index(head, sequence):
    assert poly_a_trim_index(head + sequence, revcomp=True) == len(head)


def test_expected_errors():
    def encode_qualities(quals):
        return "".join(chr(q + 33) for q in quals)

    assert pytest.approx(0.0) == expected_errors("")

    assert pytest.approx(0.1) == expected_errors(encode_qualities([10]))
    assert pytest.approx(0.01) == expected_errors(encode_qualities([20]))
    assert pytest.approx(0.001) == expected_errors(encode_qualities([30]))

    assert pytest.approx(0.2) == expected_errors(encode_qualities([10, 10]))
    assert pytest.approx(0.11) == expected_errors(encode_qualities([10, 20]))
    assert pytest.approx(0.11) == expected_errors(encode_qualities([20, 10]))

    assert pytest.approx(0.3) == expected_errors(encode_qualities([10, 10, 10]))
    assert pytest.approx(0.111) == expected_errors(encode_qualities([10, 20, 30]))
    assert pytest.approx(0.2111) == expected_errors(
        encode_qualities([10, 10, 20, 30, 40])
    )