File: tests-Hamming.cpp

package info (click to toggle)
rapidfuzz-cpp 3.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,480 kB
  • sloc: cpp: 30,893; python: 63; makefile: 26; sh: 8
file content (129 lines) | stat: -rw-r--r-- 5,319 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#if CATCH2_VERSION == 2
#    include <catch2/catch.hpp>
#else
#    include <catch2/catch_test_macros.hpp>
#    include <catch2/matchers/catch_matchers_floating_point.hpp>
#endif

#include <rapidfuzz/distance.hpp>
#include <rapidfuzz/distance/Hamming.hpp>
#include <string>

#include "../common.hpp"
#include "rapidfuzz/details/type_traits.hpp"

using Catch::Matchers::WithinAbs;

template <typename Sentence1, typename Sentence2>
size_t hamming_distance(const Sentence1& s1, const Sentence2& s2,
                        size_t max = std::numeric_limits<size_t>::max())
{
    size_t res1 = rapidfuzz::hamming_distance(s1, s2, max);
    size_t res2 = rapidfuzz::hamming_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max);
    size_t res3 = rapidfuzz::hamming_distance(make_bidir(s1.begin()), make_bidir(s1.end()),
                                              make_bidir(s2.begin()), make_bidir(s2.end()), max);
    rapidfuzz::CachedHamming<rapidfuzz::char_type<Sentence1>> scorer(s1);
    size_t res4 = scorer.distance(s2, max);
    size_t res5 = scorer.distance(s2.begin(), s2.end(), max);
    REQUIRE(res1 == res2);
    REQUIRE(res1 == res3);
    REQUIRE(res1 == res4);
    REQUIRE(res1 == res5);
    return res1;
}

template <typename Sentence1, typename Sentence2>
size_t hamming_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = 0)
{
    size_t res1 = rapidfuzz::hamming_similarity(s1, s2, max);
    size_t res2 = rapidfuzz::hamming_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max);
    size_t res3 = rapidfuzz::hamming_similarity(make_bidir(s1.begin()), make_bidir(s1.end()),
                                                make_bidir(s2.begin()), make_bidir(s2.end()), max);
    rapidfuzz::CachedHamming<rapidfuzz::char_type<Sentence1>> scorer(s1);
    size_t res4 = scorer.similarity(s2, max);
    size_t res5 = scorer.similarity(s2.begin(), s2.end(), max);
    REQUIRE(res1 == res2);
    REQUIRE(res1 == res3);
    REQUIRE(res1 == res4);
    REQUIRE(res1 == res5);
    return res1;
}

template <typename Sentence1, typename Sentence2>
double hamming_normalized_distance(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 1.0)
{
    double res1 = rapidfuzz::hamming_normalized_distance(s1, s2, score_cutoff);
    double res2 =
        rapidfuzz::hamming_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
    double res3 =
        rapidfuzz::hamming_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()),
                                               make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff);
    rapidfuzz::CachedHamming<rapidfuzz::char_type<Sentence1>> scorer(s1);
    double res4 = scorer.normalized_distance(s2, score_cutoff);
    double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff);
    REQUIRE_THAT(res1, WithinAbs(res2, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res3, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res4, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res5, 0.0001));
    return res1;
}

template <typename Sentence1, typename Sentence2>
double hamming_normalized_similarity(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0.0)
{
    double res1 = rapidfuzz::hamming_normalized_similarity(s1, s2, score_cutoff);
    double res2 =
        rapidfuzz::hamming_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
    double res3 =
        rapidfuzz::hamming_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()),
                                                 make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff);
    rapidfuzz::CachedHamming<rapidfuzz::char_type<Sentence1>> scorer(s1);
    double res4 = scorer.normalized_similarity(s2, score_cutoff);
    double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff);
    REQUIRE_THAT(res1, WithinAbs(res2, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res3, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res4, 0.0001));
    REQUIRE_THAT(res1, WithinAbs(res5, 0.0001));
    return res1;
}

TEST_CASE("Hamming")
{
    std::string test = "aaaa";
    std::string diff_a = "abaa";
    std::string diff_b = "aaba";
    std::string diff_len = "aaaaa";

    SECTION("hamming calculates correct distances")
    {
        REQUIRE(hamming_distance(test, test) == 0);
        REQUIRE(hamming_distance(test, diff_a) == 1);
        REQUIRE(hamming_distance(test, diff_b) == 1);
        REQUIRE(hamming_distance(diff_a, diff_b) == 2);
    }

    SECTION("hamming handles different string lengths as insertions / deletions")
    {
        REQUIRE(hamming_distance(test, diff_len) == 1);
        REQUIRE(hamming_distance(diff_len, test) == 1);
    }
}

TEST_CASE("Hamming_editops")
{
    std::string s = "Lorem ipsum.";
    std::string d = "XYZLorem ABC iPsum";

    {
        rapidfuzz::Editops ops = rapidfuzz::hamming_editops(s, d);
        REQUIRE(d == rapidfuzz::editops_apply_str<char>(ops, s, d));
        REQUIRE(ops.get_src_len() == s.size());
        REQUIRE(ops.get_dest_len() == d.size());
    }
    {
        rapidfuzz::Editops ops = rapidfuzz::hamming_editops(d, s);
        REQUIRE(s == rapidfuzz::editops_apply_str<char>(ops, d, s));
        REQUIRE(ops.get_src_len() == d.size());
        REQUIRE(ops.get_dest_len() == s.size());
    }
}