File: test_common.py

package info (click to toggle)
textdistance 4.6.3-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 448 kB
  • sloc: python: 2,728; sh: 4; makefile: 3
file content (108 lines) | stat: -rw-r--r-- 2,679 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# built-in
from math import isclose

# external
import hypothesis
import pytest

# project
import textdistance


ALGS = (
    textdistance.bag,

    textdistance.hamming,
    textdistance.levenshtein,
    textdistance.damerau_levenshtein,
    textdistance.jaro,
    textdistance.jaro_winkler,
    textdistance.mlipns,

    textdistance.lcsseq,
    textdistance.lcsstr,
    textdistance.ratcliff_obershelp,

    textdistance.jaccard,
    textdistance.sorensen,
    textdistance.tversky,
    textdistance.overlap,
    textdistance.cosine,
    textdistance.strcmp95,
    textdistance.monge_elkan,

    textdistance.mra,

    textdistance.prefix,
    textdistance.postfix,
    textdistance.identity,
    # textdistance.length,

    # numpy-based:
    # textdistance.gotoh,
    textdistance.needleman_wunsch,
    textdistance.smith_waterman,
    textdistance.editex,
)


@pytest.mark.parametrize('alg', ALGS)
@hypothesis.given(
    left=hypothesis.strategies.text(),
    right=hypothesis.strategies.text(),
)
def test_normalization_range(left, right, alg):
    assert 0 <= alg.normalized_distance(left, right) <= 1
    assert 0 <= alg.normalized_similarity(left, right) <= 1


@pytest.mark.parametrize('alg', ALGS)
@hypothesis.given(
    left=hypothesis.strategies.text(),
    right=hypothesis.strategies.text(),
)
def test_normalization_by_one(left, right, alg):
    d = alg.normalized_distance(left, right)
    s = alg.normalized_similarity(left, right)
    assert isclose(s + d, 1)


@pytest.mark.parametrize('alg', ALGS)
@hypothesis.given(text=hypothesis.strategies.text())
def test_normalization_same(text, alg):
    assert alg.normalized_distance(text, text) == 0
    if alg is not textdistance.needleman_wunsch:
        assert alg.distance(text, text) == 0
    assert alg.normalized_similarity(text, text) == 1


@pytest.mark.parametrize('alg', ALGS)
@hypothesis.settings(deadline=None)
@hypothesis.given(
    left=hypothesis.strategies.text(min_size=1),
    right=hypothesis.strategies.text(min_size=1),
)
def test_normalization_monotonic(left, right, alg):
    nd = alg.normalized_distance(left, right)
    ns = alg.normalized_similarity(left, right)
    d = alg.distance(left, right)
    s = alg.similarity(left, right)
    assert (nd < ns) == (d < s)


@pytest.mark.parametrize('alg', ALGS)
def test_no_common_chars(alg):
    if alg is textdistance.editex:
        return
    assert alg.similarity('spam', 'qwer') == 0


@pytest.mark.parametrize('alg', ALGS)
def test_empty(alg):
    assert alg.distance('', '') == 0


@pytest.mark.parametrize('alg', ALGS)
def test_unequal_distance(alg):
    if alg.maximum('', 'qwertyui'):
        assert alg.distance('', 'qwertyui') > 0