1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
|
/* SPDX-License-Identifier: MIT */
/* Copyright © 2020 Max Bachmann */
#include <algorithm>
#include <array>
#include <iterator>
namespace rapidfuzz {
namespace detail {
template <typename InputIt1, typename InputIt2>
DecomposedSet<InputIt1, InputIt2, InputIt1> set_decomposition(SplittedSentenceView<InputIt1> a,
SplittedSentenceView<InputIt2> b)
{
a.dedupe();
b.dedupe();
RangeVec<InputIt1> intersection;
RangeVec<InputIt1> difference_ab;
RangeVec<InputIt2> difference_ba = b.words();
for (const auto& current_a : a.words()) {
auto element_b = std::find(difference_ba.begin(), difference_ba.end(), current_a);
if (element_b != difference_ba.end()) {
difference_ba.erase(element_b);
intersection.push_back(current_a);
}
else {
difference_ab.push_back(current_a);
}
}
return {difference_ab, difference_ba, intersection};
}
template <class InputIt1, class InputIt2>
std::pair<InputIt1, InputIt2> rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2)
{
while (first1 != last1 && first2 != last2 && *first1 == *first2)
++first1, ++first2;
return std::make_pair(first1, first2);
}
/**
* Removes common prefix of two string views
*/
template <typename InputIt1, typename InputIt2>
size_t remove_common_prefix(Range<InputIt1>& s1, Range<InputIt2>& s2)
{
auto first1 = std::begin(s1);
size_t prefix = static_cast<size_t>(
std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first));
s1.remove_prefix(prefix);
s2.remove_prefix(prefix);
return prefix;
}
/**
* Removes common suffix of two string views
*/
template <typename InputIt1, typename InputIt2>
size_t remove_common_suffix(Range<InputIt1>& s1, Range<InputIt2>& s2)
{
auto rfirst1 = s1.rbegin();
size_t suffix = static_cast<size_t>(
std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first));
s1.remove_suffix(suffix);
s2.remove_suffix(suffix);
return suffix;
}
/**
* Removes common affix of two string views
*/
template <typename InputIt1, typename InputIt2>
StringAffix remove_common_affix(Range<InputIt1>& s1, Range<InputIt2>& s2)
{
return StringAffix{remove_common_prefix(s1, s2), remove_common_suffix(s1, s2)};
}
template <typename, typename = void>
struct is_space_dispatch_tag : std::integral_constant<int, 0> {};
template <typename CharT>
struct is_space_dispatch_tag<CharT, typename std::enable_if<sizeof(CharT) == 1>::type>
: std::integral_constant<int, 1> {};
/*
* Implementation of is_space for char types that are at least 2 Byte in size
*/
template <typename CharT>
bool is_space_impl(const CharT ch, std::integral_constant<int, 0>)
{
switch (ch) {
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000C:
case 0x000D:
case 0x001C:
case 0x001D:
case 0x001E:
case 0x001F:
case 0x0020:
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x2000:
case 0x2001:
case 0x2002:
case 0x2003:
case 0x2004:
case 0x2005:
case 0x2006:
case 0x2007:
case 0x2008:
case 0x2009:
case 0x200A:
case 0x2028:
case 0x2029:
case 0x202F:
case 0x205F:
case 0x3000: return true;
}
return false;
}
/*
* Implementation of is_space for char types that are 1 Byte in size
*/
template <typename CharT>
bool is_space_impl(const CharT ch, std::integral_constant<int, 1>)
{
switch (ch) {
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000C:
case 0x000D:
case 0x001C:
case 0x001D:
case 0x001E:
case 0x001F:
case 0x0020: return true;
}
return false;
}
/*
* checks whether unicode characters have the bidirectional
* type 'WS', 'B' or 'S' or the category 'Zs'
*/
template <typename CharT>
bool is_space(const CharT ch)
{
return is_space_impl(ch, is_space_dispatch_tag<CharT>{});
}
template <typename InputIt, typename CharT>
SplittedSentenceView<InputIt> sorted_split(InputIt first, InputIt last)
{
RangeVec<InputIt> splitted;
auto second = first;
for (; first != last; first = second + 1) {
second = std::find_if(first, last, is_space<CharT>);
if (first != second) {
splitted.emplace_back(first, second);
}
if (second == last) break;
}
std::sort(splitted.begin(), splitted.end());
return SplittedSentenceView<InputIt>(splitted);
}
} // namespace detail
} // namespace rapidfuzz
|