File: test_kmer_heuristic.py

package info (click to toggle)
python-cutadapt 4.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,992 kB
  • sloc: python: 9,695; ansic: 177; makefile: 159
file content (119 lines) | stat: -rw-r--r-- 3,798 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pytest

from cutadapt.kmer_heuristic import (
    kmer_chunks,
    minimize_kmer_search_list,
    create_back_overlap_searchsets,
    create_positions_and_kmers,
)


@pytest.mark.parametrize(
    ["sequence", "chunks", "expected"],
    [
        ("ABC", 3, {"A", "B", "C"}),
        ("ABCD", 3, {"AB", "C", "D"}),
    ],
)
def test_kmer_chunks(sequence, chunks, expected):
    assert kmer_chunks(sequence, chunks) == expected


@pytest.mark.parametrize(
    ["kmer_search_list", "expected"],
    [
        ([("ABC", -33, None), ("ABC", -19, None)], [("ABC", -33, None)]),
        (
            [("ABC", -33, None), ("ABC", -19, None), ("ABC", 0, None)],
            [("ABC", 0, None)],
        ),
        ([("ABC", 0, 10), ("ABC", 0, 20)], [("ABC", 0, 20)]),
        ([("ABC", 0, 10), ("ABC", 0, 20), ("ABC", 0, None)], [("ABC", 0, None)]),
        ([("ABC", 0, 10), ("ABC", -19, None), ("ABC", 0, None)], [("ABC", 0, None)]),
        ([("ABC", 0, 10), ("ABC", -19, None)], [("ABC", 0, 10), ("ABC", -19, None)]),
    ],
)
def test_minimize_kmer_search_list(kmer_search_list, expected):
    result = minimize_kmer_search_list(kmer_search_list)
    assert set(result) == set(expected)


def test_create_back_overlap_searchsets():
    adapter = "ABCDEFGHIJ0123456789"
    searchsets = create_back_overlap_searchsets(adapter, 3, 0.1)
    assert len(searchsets) == 5
    assert (-3, None, {"ABC"}) in searchsets
    assert (-4, None, {"ABCD"}) in searchsets
    assert (-9, None, {"ABCDE"}) in searchsets
    assert (-19, None, kmer_chunks(adapter[:10], 2)) in searchsets
    assert (-20, None, kmer_chunks(adapter, 3)) in searchsets


@pytest.mark.parametrize(
    ["kwargs", "expected"],
    [
        (
            dict(back_adapter=True, front_adapter=False, internal=True, min_overlap=3),
            [
                (-3, None, ["ABC"]),
                (-4, None, ["ABCD"]),
                (-19, None, ["ABCDE", "FGHIJ"]),
                (0, None, ["ABCDEFG", "HIJ0123", "456789"]),
            ],
        ),
        (
            dict(back_adapter=True, front_adapter=False, internal=False, min_overlap=3),
            [
                (-3, None, ["ABC"]),
                (-4, None, ["ABCD"]),
                (-19, None, ["ABCDE", "FGHIJ"]),
                (-20, None, ["ABCDEFG", "HIJ0123", "456789"]),
            ],
        ),
        (
            dict(back_adapter=False, front_adapter=True, internal=False, min_overlap=3),
            [
                (0, 3, ["789"]),
                (0, 4, ["6789"]),
                (0, 19, ["01234", "56789"]),
                (0, 20, ["ABCDEF", "GHIJ012", "3456789"]),
            ],
        ),
        (
            dict(back_adapter=True, front_adapter=False, internal=True, min_overlap=20),
            [
                (0, None, ["ABCDEFG", "HIJ0123", "456789"]),
            ],
        ),
        (
            dict(back_adapter=False, front_adapter=False, internal=True, min_overlap=3),
            [
                (0, None, ["ABCDEFG", "HIJ0123", "456789"]),
            ],
        ),
    ],
)
def test_create_kmers_and_positions(kwargs, expected):
    adapter = "ABCDEFGHIJ0123456789"
    result = create_positions_and_kmers(
        adapter,
        error_rate=0.1,
        **kwargs,
    )
    assert {(start, stop): frozenset(kmers) for start, stop, kmers in result} == {
        (start, stop): frozenset(kmers) for start, stop, kmers in expected
    }


@pytest.mark.timeout(0.5)
def test_create_positions_and_kmers_slow():
    create_positions_and_kmers(
        # Ridiculous size to check if there aren't any quadratic or exponential
        # algorithms in the code.
        "A" * 1000,
        min_overlap=3,
        error_rate=0.1,
        back_adapter=True,
        front_adapter=False,
        internal=True,
    )