File: string_utils.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (121 lines) | stat: -rw-r--r-- 2,996 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include "caffe2/utils/string_utils.h"

#include <algorithm>
#include <sstream>
#include <vector>

namespace caffe2 {

std::vector<std::string>
split(char separator, const std::string& string, bool ignore_empty) {
  std::vector<std::string> pieces;
  std::stringstream ss(string);
  std::string item;
  while (getline(ss, item, separator)) {
    if (!ignore_empty || !item.empty()) {
      pieces.push_back(std::move(item));
    }
  }
  return pieces;
}

std::string trim(const std::string& str) {
  size_t left = str.find_first_not_of(' ');
  if (left == std::string::npos) {
    return str;
  }
  size_t right = str.find_last_not_of(' ');
  return str.substr(left, (right - left + 1));
}

size_t editDistance(
  const std::string& s1, const std::string& s2, size_t max_distance)
  {
    std::vector<size_t> current(s1.length() + 1);
    std::vector<size_t> previous(s1.length() + 1);
    std::vector<size_t> previous1(s1.length() + 1);

    return editDistanceHelper(
        s1.c_str(),
        s1.length(),
        s2.c_str(),
        s2.length(),
        current,
        previous,
        previous1,
        max_distance
    );
  }
  #define NEXT_UNSAFE(s, i, c) { \
      (c)=(uint8_t)(s)[(i)++]; \
  }

int32_t editDistanceHelper(const char* s1,
  size_t s1_len,
  const char* s2,
  size_t s2_len,
  std::vector<size_t> &current,
  std::vector<size_t> &previous,
  std::vector<size_t> &previous1,
  size_t max_distance) {
    if (max_distance) {
      if (std::max(s1_len, s2_len) - std::min(s1_len, s2_len) > max_distance) {
        return max_distance+1;
      }
    }

    for (size_t j = 0; j <= s1_len; ++j) {
      current[j] = j;
    }

    int32_t str2_offset = 0;
    char prev2 = 0;
    for (size_t i = 1; i <= s2_len; ++i) {
      swap(previous1, previous);
      swap(current, previous);
      current[0] = i;

      // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
      char c2 = s2[str2_offset];
      char prev1 = 0;
      int32_t str1_offset = 0;

      NEXT_UNSAFE(s2, str2_offset, c2);

      size_t current_min = s1_len;
      for (size_t j = 1; j <= s1_len; ++j) {
        size_t insertion = previous[j] + 1;
        size_t deletion = current[j - 1] + 1;
        size_t substitution = previous[j - 1];
        size_t transposition = insertion;
        // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
        char c1 = s1[str1_offset];

        NEXT_UNSAFE(s1, str1_offset, c1);

        if (c1 != c2) {
          substitution += 1;
        }


        if (prev1 == c2 && prev2 == c1 && j > 1 && i > 1) {
          transposition = previous1[j - 2] + 1;
        }
        prev1 = c1;

        current[j] = std::min(std::min(insertion, deletion),
                         std::min(substitution, transposition));
        current_min = std::min(current_min, current[j]);
      }


      if (max_distance != 0 && current_min > max_distance) {
        return max_distance+1;
      }

      prev2 = c2;
    }

    return current[s1_len];
  }
} // namespace caffe2