1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
import string
import pytest
from cutadapt._match_tables import matches_lookup
from cutadapt.adapters import KmerFinder
from cutadapt._kmer_finder import MAXIMUM_WORD_SIZE
KMER_FINDER_TESTS = [
# kmer, start, stop, ref_wildcards, query_wildcards, sequence, expected
("ACGT", 0, None, False, False, "ACGTACG", True),
("ACGT", 0, None, False, False, "ACgtACG", True),
("acgt", 0, None, False, False, "ACgtACG", True),
("ACGT", 0, None, False, False, "acgtacg", True),
("ACGT", 0, None, False, False, "gacgact", False),
("ACGT", 0, None, False, True, "ACGNACG", True),
("ACGT", 0, None, False, False, "ACGNACG", False),
("ACGN", 0, None, True, False, "ACGTACG", True),
("ACGN", 0, None, True, False, "ACGxACG", True),
("ACKN", 0, None, True, False, "ACGTACG", True),
("ACKN", 0, None, True, True, "ACWRACG", True),
("ACKN", 0, None, True, True, "ACWxACG", False),
]
@pytest.mark.parametrize(
[
"kmer",
"start",
"stop",
"ref_wildcards",
"query_wildcards",
"sequence",
"expected",
],
KMER_FINDER_TESTS,
)
def test_kmer_finder(
kmer: str,
start: int,
stop: int,
ref_wildcards: bool,
query_wildcards: bool,
sequence: str,
expected: bool,
):
kmer_finder = KmerFinder([(start, stop, [kmer])], ref_wildcards, query_wildcards)
assert kmer_finder.kmers_present(sequence) is expected
@pytest.mark.parametrize(
["ref_wildcards", "query_wildcards"],
[
(False, False),
(True, False),
(False, True),
(True, True),
],
)
def test_kmer_finder_per_char_matching(ref_wildcards, query_wildcards):
match_table = matches_lookup(ref_wildcards, query_wildcards)
for char in string.ascii_letters:
matches = match_table[ord(char)]
positions_and_kmers = [(0, None, [char])]
kmer_finder = KmerFinder(
positions_and_kmers,
ref_wildcards=ref_wildcards,
query_wildcards=query_wildcards,
)
for comp_char in string.ascii_letters:
should_match = comp_char.encode("ascii") in matches
if kmer_finder.kmers_present(comp_char) is not should_match:
raise ValueError(
f"{char} should{' ' if should_match else ' not '}match {comp_char}"
)
def test_kmer_finder_initialize_bigword():
with pytest.raises(ValueError) as error:
KmerFinder([(0, None, ["A" * (MAXIMUM_WORD_SIZE + 1)])])
error.match("A" * (MAXIMUM_WORD_SIZE + 1))
error.match(str(MAXIMUM_WORD_SIZE))
def test_kmer_finder_initialize_total_greater_than_max():
kmer_finder = KmerFinder([(0, None, ["A" * 32, "B" * 32, "C" * 32, "D" * 43])])
assert kmer_finder.kmers_present("X" * 100 + "A" * 32)
assert kmer_finder.kmers_present("X" * 100 + "B" * 32)
assert kmer_finder.kmers_present("X" * 100 + "C" * 32)
assert kmer_finder.kmers_present("X" * 100 + "D" * 43)
assert not kmer_finder.kmers_present(string.ascii_letters)
def test_kmer_finder_finds_all():
kmer_finder = KmerFinder([(0, None, ["teenage", "mutant", "ninja", "turtles"])])
assert kmer_finder.kmers_present("Smells like teenage spirit")
assert kmer_finder.kmers_present("Everyone with a SNP is technically a mutant.")
assert kmer_finder.kmers_present("He made a ninja PR that was merged before review")
assert kmer_finder.kmers_present(
"Turtles are treated as outgroup, for 'more advanced' reptiles but "
"molecular evidence suggests they are more close to the dinosaurs than "
"previously thought."
)
assert not kmer_finder.kmers_present(
"A turtle may be slow, but it also lives for a long time."
)
def test_kmer_finder_finds_in_region():
kmer_finder = KmerFinder([(-20, None, ["peace"])])
# Finding peace, quotes from Mahatma Gandhi
assert kmer_finder.kmers_present("Each one has to find his peace from within")
# Peace not found here because outside of the search range.
assert not kmer_finder.kmers_present(
"And peace to be real must be unaffected by outside circumstances."
)
|