File: edit_distance.cpp

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (55 lines) | stat: -rw-r--r-- 1,611 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include <torch/csrc/jit/frontend/edit_distance.h>
#include <algorithm>
#include <cstring>
#include <memory>

namespace torch::jit {

// computes levenshtein edit distance between two words
// returns maxEditDistance + 1 if the edit distance exceeds MaxEditDistance
// reference: http://llvm.org/doxygen/edit__distance_8h_source.html
size_t ComputeEditDistance(
    const char* word1,
    const char* word2,
    size_t maxEditDistance) {
  size_t m = strlen(word1);
  size_t n = strlen(word2);

  const unsigned small_buffer_size = 64;
  // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
  unsigned small_buffer[small_buffer_size];
  // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
  std::unique_ptr<unsigned[]> allocated;
  unsigned* row = small_buffer;
  if (n + 1 > small_buffer_size) {
    row = new unsigned[n + 1];
    allocated.reset(row);
  }

  for (unsigned i = 1; i <= n; ++i)
    row[i] = i;

  for (size_t y = 1; y <= m; ++y) {
    row[0] = y;
    unsigned best_this_row = row[0];

    unsigned previous = y - 1;
    for (size_t x = 1; x <= n; ++x) {
      const auto old_row = row[x];
      row[x] = std::min(
          previous + (word1[y - 1] == word2[x - 1] ? 0u : 1u),
          std::min(row[x - 1], row[x]) + 1);
      previous = old_row;
      best_this_row = std::min(best_this_row, row[x]);
    }

    if (maxEditDistance && best_this_row > maxEditDistance)
      return maxEditDistance + 1;
  }

  // NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign)
  unsigned result = row[n];
  return result;
}

} // namespace torch::jit