1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
|
import pytest
from cutadapt.kmer_heuristic import (
kmer_chunks,
minimize_kmer_search_list,
create_back_overlap_searchsets,
create_positions_and_kmers,
)
@pytest.mark.parametrize(
["sequence", "chunks", "expected"],
[
("ABC", 3, {"A", "B", "C"}),
("ABCD", 3, {"AB", "C", "D"}),
],
)
def test_kmer_chunks(sequence, chunks, expected):
assert kmer_chunks(sequence, chunks) == expected
@pytest.mark.parametrize(
["kmer_search_list", "expected"],
[
([("ABC", -33, None), ("ABC", -19, None)], [("ABC", -33, None)]),
(
[("ABC", -33, None), ("ABC", -19, None), ("ABC", 0, None)],
[("ABC", 0, None)],
),
([("ABC", 0, 10), ("ABC", 0, 20)], [("ABC", 0, 20)]),
([("ABC", 0, 10), ("ABC", 0, 20), ("ABC", 0, None)], [("ABC", 0, None)]),
([("ABC", 0, 10), ("ABC", -19, None), ("ABC", 0, None)], [("ABC", 0, None)]),
([("ABC", 0, 10), ("ABC", -19, None)], [("ABC", 0, 10), ("ABC", -19, None)]),
],
)
def test_minimize_kmer_search_list(kmer_search_list, expected):
result = minimize_kmer_search_list(kmer_search_list)
assert set(result) == set(expected)
def test_create_back_overlap_searchsets():
adapter = "ABCDEFGHIJ0123456789"
searchsets = create_back_overlap_searchsets(adapter, 3, 0.1)
assert len(searchsets) == 5
assert (-3, None, {"ABC"}) in searchsets
assert (-4, None, {"ABCD"}) in searchsets
assert (-9, None, {"ABCDE"}) in searchsets
assert (-19, None, kmer_chunks(adapter[:10], 2)) in searchsets
assert (-20, None, kmer_chunks(adapter, 3)) in searchsets
@pytest.mark.parametrize(
["kwargs", "expected"],
[
(
dict(back_adapter=True, front_adapter=False, internal=True, min_overlap=3),
[
(-3, None, ["ABC"]),
(-4, None, ["ABCD"]),
(-19, None, ["ABCDE", "FGHIJ"]),
(0, None, ["ABCDEFG", "HIJ0123", "456789"]),
],
),
(
dict(back_adapter=True, front_adapter=False, internal=False, min_overlap=3),
[
(-3, None, ["ABC"]),
(-4, None, ["ABCD"]),
(-19, None, ["ABCDE", "FGHIJ"]),
(-20, None, ["ABCDEFG", "HIJ0123", "456789"]),
],
),
(
dict(back_adapter=False, front_adapter=True, internal=False, min_overlap=3),
[
(0, 3, ["789"]),
(0, 4, ["6789"]),
(0, 19, ["01234", "56789"]),
(0, 20, ["ABCDEF", "GHIJ012", "3456789"]),
],
),
(
dict(back_adapter=True, front_adapter=False, internal=True, min_overlap=20),
[
(0, None, ["ABCDEFG", "HIJ0123", "456789"]),
],
),
(
dict(back_adapter=False, front_adapter=False, internal=True, min_overlap=3),
[
(0, None, ["ABCDEFG", "HIJ0123", "456789"]),
],
),
],
)
def test_create_kmers_and_positions(kwargs, expected):
adapter = "ABCDEFGHIJ0123456789"
result = create_positions_and_kmers(
adapter,
error_rate=0.1,
**kwargs,
)
assert {(start, stop): frozenset(kmers) for start, stop, kmers in result} == {
(start, stop): frozenset(kmers) for start, stop, kmers in expected
}
@pytest.mark.timeout(0.5)
def test_create_positions_and_kmers_slow():
create_positions_and_kmers(
# Ridiculous size to check if there aren't any quadratic or exponential
# algorithms in the code.
"A" * 1000,
min_overlap=3,
error_rate=0.1,
back_adapter=True,
front_adapter=False,
internal=True,
)
|