File: test_core.py

package info (click to toggle)
python-tinyalign 0.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 112 kB
  • sloc: python: 79; sh: 8; makefile: 6
file content (98 lines) | stat: -rw-r--r-- 2,842 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from tinyalign import edit_distance, hamming_distance

import random
import pytest


STRING_PAIRS = [
    ('', ''),
    ('', 'A'),
    ('A', 'A'),
    ('AB', ''),
    ('AB', 'ABC'),
    ('TGAATCCC', 'CCTGAATC'),
    ('ANANAS', 'BANANA'),
    ('SISSI', 'MISSISSIPPI'),
    ('GGAATCCC', 'TGAGGGATAAATATTTAGAATTTAGTAGTAGTGTT'),
    ('TCTGTTCCCTCCCTGTCTCA', 'TTTTAGGAAATACGCC'),
    ('TGAGACACGCAACATGGGAAAGGCAAGGCACACAGGGGATAGG', 'AATTTATTTTATTGTGATTTTTTGGAGGTTTGGAAGCCACTAAGCTATACTGAGACACGCAACAGGGGAAAGGCAAGGCACA'),
    ('TCCATCTCATCCCTGCGTGTCCCATCTGTTCCCTCCCTGTCTCA', 'TTTTAGGAAATACGCCTGGTGGGGTTTGGAGTATAGTGAAAGATAGGTGAGTTGGTCGGGTG'),
    ('A', 'TCTGCTCCTGGCCCATGATCGTATAACTTTCAAATTT'),
    ('GCGCGGACT', 'TAAATCCTGG'),
    ]


def py_edit_distance(s, t):
    """
    Pure-Python edit distance
    """
    m = len(s)
    n = len(t)

    costs = list(range(m + 1))
    for j in range(1, n + 1):
        prev = costs[0]
        costs[0] += 1
        for i in range(1, m + 1):
            c = min(
                prev + int(s[i-1] != t[j-1]),
                costs[i] + 1,
                costs[i-1] + 1,
            )
            prev = costs[i]
            costs[i] = c

    return costs[-1]


def random_string():
    return ''.join(random.choice('AC') for _ in range(random.randint(0, 20)))


RANDOM_STRING_PAIRS = [(random_string(), random_string()) for _ in range(10000)]


def test_edit_distance():
    assert edit_distance('', '') == 0
    assert edit_distance('', 'A') == 1
    assert edit_distance('A', 'B') == 1
    assert edit_distance('A', 'A') == 0
    assert edit_distance('A', 'AB') == 1
    assert edit_distance('BA', 'AB') == 2
    for s, t in STRING_PAIRS + RANDOM_STRING_PAIRS:
        assert edit_distance(s, '') == len(s)
        assert edit_distance('', s) == len(s)
        assert edit_distance(s, t) == edit_distance(t, s)
        assert edit_distance(s, t) == py_edit_distance(s, t)


def assert_banded(s, t, maxdiff):
    banded_dist = edit_distance(s, t, maxdiff=maxdiff)
    true_dist = edit_distance(s, t)
    if true_dist > maxdiff:
        assert banded_dist > maxdiff
    else:
        assert banded_dist == true_dist


def test_edit_distance_banded():
    for maxdiff in range(5):
        assert_banded('ABC', '', maxdiff)
        for s, t in STRING_PAIRS:
            assert_banded(s, '', maxdiff)
            assert_banded('', s, maxdiff)
            assert_banded(s, t, maxdiff)
            assert_banded(t, s, maxdiff)


def test_hamming_distance():
    assert hamming_distance('', '') == 0
    assert hamming_distance('A', 'A') == 0
    assert hamming_distance('HELLO', 'HELLO') == 0
    assert hamming_distance('ABC', 'DEF') == 3
    assert hamming_distance('ABCXDEF', 'ABCYDEF') == 1


def test_hamming_distance_incorrect_length():
    with pytest.raises(IndexError):
        hamming_distance('A', 'BC')