File: edit_distance.cpp

package info (click to toggle)
pytorch 1.7.1-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 80,340 kB
  • sloc: cpp: 670,830; python: 343,991; ansic: 67,845; asm: 5,503; sh: 2,924; java: 2,888; xml: 266; makefile: 244; ruby: 148; yacc: 144; objc: 51; lex: 44
file content (54 lines) | stat: -rw-r--r-- 1,409 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <torch/csrc/jit/frontend/edit_distance.h>
#include <algorithm>
#include <cstring>
#include <memory>

namespace torch {
namespace jit {

// computes levenshtein edit distance between two words
// returns maxEditDistance + 1 if the edit distance exceeds MaxEditDistance
// reference: http://llvm.org/doxygen/edit__distance_8h_source.html
size_t ComputeEditDistance(
    const char* word1,
    const char* word2,
    size_t maxEditDistance) {
  size_t m = strlen(word1);
  size_t n = strlen(word2);

  const unsigned small_buffer_size = 64;
  unsigned small_buffer[small_buffer_size];
  std::unique_ptr<unsigned[]> allocated;
  unsigned* row = small_buffer;
  if (n + 1 > small_buffer_size) {
    row = new unsigned[n + 1];
    allocated.reset(row);
  }

  for (unsigned i = 1; i <= n; ++i)
    row[i] = i;

  for (size_t y = 1; y <= m; ++y) {
    row[0] = y;
    unsigned best_this_row = row[0];

    unsigned previous = y - 1;
    for (size_t x = 1; x <= n; ++x) {
      int old_row = row[x];
      row[x] = std::min(
          previous + (word1[y - 1] == word2[x - 1] ? 0u : 1u),
          std::min(row[x - 1], row[x]) + 1);
      previous = old_row;
      best_this_row = std::min(best_this_row, row[x]);
    }

    if (maxEditDistance && best_this_row > maxEditDistance)
      return maxEditDistance + 1;
  }

  unsigned result = row[n];
  return result;
}

} // namespace jit
} // namespace torch