File: JaroWinkler.hpp

package info (click to toggle)
rapidfuzz-cpp 3.1.1-1~bpo12%2B1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-backports
  • size: 2,444 kB
  • sloc: cpp: 30,295; python: 63; makefile: 26; sh: 8
file content (35 lines) | stat: -rw-r--r-- 1,413 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/* SPDX-License-Identifier: MIT */
/* Copyright © 2022-present Max Bachmann */

#pragma once
#include "Jaro.hpp"

namespace rapidfuzz_reference {

template <typename InputIt1, typename InputIt2,
          typename = std::enable_if_t<!std::is_same_v<InputIt2, double>>>
double jaro_winkler_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, InputIt2 T_last,
                               double prefix_weight = 0.1, double score_cutoff = 0.0)
{
    int64_t min_len = std::min(std::distance(P_first, P_last), std::distance(T_first, T_last));
    size_t max_prefix = std::min(static_cast<size_t>(min_len), size_t(4));

    size_t prefix = 0;
    for (; prefix < max_prefix; ++prefix)
        if (T_first[static_cast<ptrdiff_t>(prefix)] != P_first[static_cast<ptrdiff_t>(prefix)]) break;

    double Sim = jaro_similarity(P_first, P_last, T_first, T_last);
    if (Sim > 0.7) Sim += static_cast<double>(prefix) * prefix_weight * (1.0 - Sim);

    return (Sim >= score_cutoff) ? Sim : 0;
}

template <typename Sentence1, typename Sentence2>
double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double prefix_weight = 0.1,
                               double score_cutoff = 0.0)
{
    return jaro_winkler_similarity(std::begin(s1), std::end(s1), std::begin(s2), std::end(s2), prefix_weight,
                                   score_cutoff);
}

} /* namespace rapidfuzz_reference */