File: Hamming_impl.hpp

package info (click to toggle)
rapidfuzz-cpp 3.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,480 kB
  • sloc: cpp: 30,893; python: 63; makefile: 26; sh: 8
file content (62 lines) | stat: -rw-r--r-- 2,098 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* SPDX-License-Identifier: MIT */
/* Copyright © 2021 Max Bachmann */

#pragma once
#include <rapidfuzz/details/Range.hpp>
#include <rapidfuzz/details/distance.hpp>
#include <stdexcept>

namespace rapidfuzz {
namespace detail {

class Hamming : public DistanceBase<Hamming, size_t, 0, std::numeric_limits<int64_t>::max(), bool> {
    friend DistanceBase<Hamming, size_t, 0, std::numeric_limits<int64_t>::max(), bool>;
    friend NormalizedMetricBase<Hamming, bool>;

    template <typename InputIt1, typename InputIt2>
    static size_t maximum(const Range<InputIt1>& s1, const Range<InputIt2>& s2, bool)
    {
        return std::max(s1.size(), s2.size());
    }

    template <typename InputIt1, typename InputIt2>
    static size_t _distance(const Range<InputIt1>& s1, const Range<InputIt2>& s2, bool pad,
                            size_t score_cutoff, size_t)
    {
        if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length.");

        size_t min_len = std::min(s1.size(), s2.size());
        size_t dist = std::max(s1.size(), s2.size());
        auto iter_s1 = s1.begin();
        auto iter_s2 = s2.begin();
        for (size_t i = 0; i < min_len; ++i)
            dist -= bool(*(iter_s1++) == *(iter_s2++));

        return (dist <= score_cutoff) ? dist : score_cutoff + 1;
    }
};

template <typename InputIt1, typename InputIt2>
Editops hamming_editops(const Range<InputIt1>& s1, const Range<InputIt2>& s2, bool pad, size_t)
{
    if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length.");

    Editops ops;
    size_t min_len = std::min(s1.size(), s2.size());
    size_t i = 0;
    for (; i < min_len; ++i)
        if (s1[i] != s2[i]) ops.emplace_back(EditType::Replace, i, i);

    for (; i < s1.size(); ++i)
        ops.emplace_back(EditType::Delete, i, s2.size());

    for (; i < s2.size(); ++i)
        ops.emplace_back(EditType::Insert, s1.size(), i);

    ops.set_src_len(s1.size());
    ops.set_dest_len(s2.size());
    return ops;
}

} // namespace detail
} // namespace rapidfuzz