File: tests-DamerauLevenshtein.cpp

package info (click to toggle)
rapidfuzz-cpp 3.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,480 kB
  • sloc: cpp: 30,893; python: 63; makefile: 26; sh: 8
file content (133 lines) | stat: -rw-r--r-- 6,227 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#if CATCH2_VERSION == 2
#    include <catch2/catch.hpp>
#else
#    include <catch2/catch_test_macros.hpp>
#    include <catch2/matchers/catch_matchers_floating_point.hpp>
#endif

#include <rapidfuzz/details/Range.hpp>
#include <rapidfuzz/details/types.hpp>
#include <string>

#include <rapidfuzz/distance/DamerauLevenshtein.hpp>

#include "../common.hpp"

using Catch::Matchers::WithinAbs;

template <typename Sentence1, typename Sentence2>
size_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2,
                                    size_t max = std::numeric_limits<size_t>::max())
{
    size_t res1 = rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, max);
    size_t res2 = rapidfuzz::experimental::damerau_levenshtein_distance(s1.begin(), s1.end(), s2.begin(),
                                                                        s2.end(), max);
    size_t res3 = rapidfuzz::experimental::damerau_levenshtein_distance(
        make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max);
    rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
    size_t res4 = scorer.distance(s2, max);
    size_t res5 = scorer.distance(s2.begin(), s2.end(), max);
    REQUIRE(res1 == res2);
    REQUIRE(res1 == res3);
    REQUIRE(res1 == res4);
    REQUIRE(res1 == res5);
    return res1;
}

template <typename Sentence1, typename Sentence2>
size_t damerau_levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = 0)
{
    size_t res1 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1, s2, max);
    size_t res2 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1.begin(), s1.end(), s2.begin(),
                                                                          s2.end(), max);
    size_t res3 = rapidfuzz::experimental::damerau_levenshtein_similarity(
        make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max);
    rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
    size_t res4 = scorer.similarity(s2, max);
    size_t res5 = scorer.similarity(s2.begin(), s2.end(), max);
    REQUIRE(res1 == res2);
    REQUIRE(res1 == res3);
    REQUIRE(res1 == res4);
    REQUIRE(res1 == res5);
    return res1;
}

template <typename Sentence1, typename Sentence2>
double damerau_levenshtein_normalized_distance(const Sentence1& s1, const Sentence2& s2,
                                               double score_cutoff = 1.0)
{
    double res1 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(s1, s2, score_cutoff);
    double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(
        s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
    double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(
        make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()),
        score_cutoff);
    rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
    double res4 = scorer.normalized_distance(s2, score_cutoff);
    double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff);
    REQUIRE_THAT(res1, WithinAbs(res2, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res3, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res4, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res5, 0.0001));
    return res1;
}

template <typename Sentence1, typename Sentence2>
double damerau_levenshtein_normalized_similarity(const Sentence1& s1, const Sentence2& s2,
                                                 double score_cutoff = 0.0)
{
    double res1 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(s1, s2, score_cutoff);
    double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(
        s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
    double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(
        make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()),
        score_cutoff);
    rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
    double res4 = scorer.normalized_similarity(s2, score_cutoff);
    double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff);
    REQUIRE_THAT(res1, WithinAbs(res2, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res3, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res4, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res5, 0.0001));
    return res1;
}

TEST_CASE("Levenshtein")
{
    std::string test = "aaaa";
    std::wstring no_suffix = L"aaa";
    std::string no_suffix2 = "aaab";
    std::string swapped1 = "abaa";
    std::string swapped2 = "baaa";
    std::string replace_all = "bbbb";

    SECTION("damerau levenshtein calculates correct distances")
    {
        REQUIRE(damerau_levenshtein_distance(test, test) == 0);
        REQUIRE(damerau_levenshtein_distance(test, no_suffix) == 1);
        REQUIRE(damerau_levenshtein_distance(swapped1, swapped2) == 1);
        REQUIRE(damerau_levenshtein_distance(test, no_suffix2) == 1);
        REQUIRE(damerau_levenshtein_distance(test, replace_all) == 4);

        {
            std::string s1 = "CA";
            std::string s2 = "ABC";
            REQUIRE(damerau_levenshtein_distance(s1, s2) == 2);
        }
    }

    SECTION("weighted levenshtein calculates correct ratios")
    {
        REQUIRE(damerau_levenshtein_normalized_similarity(test, test) == 1.0);
        REQUIRE_THAT(damerau_levenshtein_normalized_similarity(test, no_suffix), WithinAbs(0.75, 0.0001));
        REQUIRE_THAT(damerau_levenshtein_normalized_similarity(swapped1, swapped2), WithinAbs(0.75, 0.0001));
        REQUIRE_THAT(damerau_levenshtein_normalized_similarity(test, no_suffix2), WithinAbs(0.75, 0.0001));
        REQUIRE(damerau_levenshtein_normalized_similarity(test, replace_all) == 0.0);

        {
            std::string s1 = "CA";
            std::string s2 = "ABC";
            REQUIRE_THAT(damerau_levenshtein_normalized_similarity(s1, s2), WithinAbs(0.33333, 0.0001));
        }
    }
}