1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
#if CATCH2_VERSION == 2
# include <catch2/catch.hpp>
#else
# include <catch2/catch_test_macros.hpp>
# include <catch2/matchers/catch_matchers_floating_point.hpp>
#endif
#include <rapidfuzz/details/Range.hpp>
#include <rapidfuzz/details/types.hpp>
#include <string>
#include <rapidfuzz/distance/DamerauLevenshtein.hpp>
#include "../common.hpp"
using Catch::Matchers::WithinAbs;
template <typename Sentence1, typename Sentence2>
size_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2,
size_t max = std::numeric_limits<size_t>::max())
{
size_t res1 = rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, max);
size_t res2 = rapidfuzz::experimental::damerau_levenshtein_distance(s1.begin(), s1.end(), s2.begin(),
s2.end(), max);
size_t res3 = rapidfuzz::experimental::damerau_levenshtein_distance(
make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max);
rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
size_t res4 = scorer.distance(s2, max);
size_t res5 = scorer.distance(s2.begin(), s2.end(), max);
REQUIRE(res1 == res2);
REQUIRE(res1 == res3);
REQUIRE(res1 == res4);
REQUIRE(res1 == res5);
return res1;
}
template <typename Sentence1, typename Sentence2>
size_t damerau_levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = 0)
{
size_t res1 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1, s2, max);
size_t res2 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1.begin(), s1.end(), s2.begin(),
s2.end(), max);
size_t res3 = rapidfuzz::experimental::damerau_levenshtein_similarity(
make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max);
rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
size_t res4 = scorer.similarity(s2, max);
size_t res5 = scorer.similarity(s2.begin(), s2.end(), max);
REQUIRE(res1 == res2);
REQUIRE(res1 == res3);
REQUIRE(res1 == res4);
REQUIRE(res1 == res5);
return res1;
}
template <typename Sentence1, typename Sentence2>
double damerau_levenshtein_normalized_distance(const Sentence1& s1, const Sentence2& s2,
double score_cutoff = 1.0)
{
double res1 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(s1, s2, score_cutoff);
double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(
s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(
make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()),
score_cutoff);
rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
double res4 = scorer.normalized_distance(s2, score_cutoff);
double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff);
REQUIRE_THAT(res1, WithinAbs(res2, 0.0001));
REQUIRE_THAT(res1, WithinAbs(res3, 0.0001));
REQUIRE_THAT(res1, WithinAbs(res4, 0.0001));
REQUIRE_THAT(res1, WithinAbs(res5, 0.0001));
return res1;
}
template <typename Sentence1, typename Sentence2>
double damerau_levenshtein_normalized_similarity(const Sentence1& s1, const Sentence2& s2,
double score_cutoff = 0.0)
{
double res1 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(s1, s2, score_cutoff);
double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(
s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(
make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()),
score_cutoff);
rapidfuzz::experimental::CachedDamerauLevenshtein<rapidfuzz::char_type<Sentence1>> scorer(s1);
double res4 = scorer.normalized_similarity(s2, score_cutoff);
double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff);
REQUIRE_THAT(res1, WithinAbs(res2, 0.0001));
REQUIRE_THAT(res1, WithinAbs(res3, 0.0001));
REQUIRE_THAT(res1, WithinAbs(res4, 0.0001));
REQUIRE_THAT(res1, WithinAbs(res5, 0.0001));
return res1;
}
TEST_CASE("Levenshtein")
{
std::string test = "aaaa";
std::wstring no_suffix = L"aaa";
std::string no_suffix2 = "aaab";
std::string swapped1 = "abaa";
std::string swapped2 = "baaa";
std::string replace_all = "bbbb";
SECTION("damerau levenshtein calculates correct distances")
{
REQUIRE(damerau_levenshtein_distance(test, test) == 0);
REQUIRE(damerau_levenshtein_distance(test, no_suffix) == 1);
REQUIRE(damerau_levenshtein_distance(swapped1, swapped2) == 1);
REQUIRE(damerau_levenshtein_distance(test, no_suffix2) == 1);
REQUIRE(damerau_levenshtein_distance(test, replace_all) == 4);
{
std::string s1 = "CA";
std::string s2 = "ABC";
REQUIRE(damerau_levenshtein_distance(s1, s2) == 2);
}
}
SECTION("weighted levenshtein calculates correct ratios")
{
REQUIRE(damerau_levenshtein_normalized_similarity(test, test) == 1.0);
REQUIRE_THAT(damerau_levenshtein_normalized_similarity(test, no_suffix), WithinAbs(0.75, 0.0001));
REQUIRE_THAT(damerau_levenshtein_normalized_similarity(swapped1, swapped2), WithinAbs(0.75, 0.0001));
REQUIRE_THAT(damerau_levenshtein_normalized_similarity(test, no_suffix2), WithinAbs(0.75, 0.0001));
REQUIRE(damerau_levenshtein_normalized_similarity(test, replace_all) == 0.0);
{
std::string s1 = "CA";
std::string s2 = "ABC";
REQUIRE_THAT(damerau_levenshtein_normalized_similarity(s1, s2), WithinAbs(0.33333, 0.0001));
}
}
}
|