File: levenshtein.rb

package info (click to toggle)
ruby2.7 2.7.4-1%2Bdeb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 112,576 kB
  • sloc: ruby: 849,454; ansic: 697,834; yacc: 45,100; xml: 25,367; pascal: 10,051; javascript: 6,575; sh: 3,848; makefile: 759; cpp: 713; asm: 333; python: 295; lisp: 97; sed: 94; perl: 62; awk: 36
file content (57 lines) | stat: -rw-r--r-- 1,375 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
module DidYouMean
  module Levenshtein # :nodoc:
    # This code is based directly on the Text gem implementation
    # Copyright (c) 2006-2013 Paul Battley, Michael Neumann, Tim Fletcher.
    #
    # Returns a value representing the "cost" of transforming str1 into str2
    def distance(str1, str2)
      n = str1.length
      m = str2.length
      return m if n.zero?
      return n if m.zero?

      d = (0..m).to_a
      x = nil

      # to avoid duplicating an enumerable object, create it outside of the loop
      str2_codepoints = str2.codepoints

      str1.each_codepoint.with_index(1) do |char1, i|
        j = 0
        while j < m
          cost = (char1 == str2_codepoints[j]) ? 0 : 1
          x = min3(
            d[j+1] + 1, # insertion
            i + 1,      # deletion
            d[j] + cost # substitution
          )
          d[j] = i
          i = x

          j += 1
        end
        d[m] = x
      end

      x
    end
    module_function :distance

    private

    # detects the minimum value out of three arguments. This method is
    # faster than `[a, b, c].min` and puts less GC pressure.
    # See https://github.com/ruby/did_you_mean/pull/1 for a performance
    # benchmark.
    def min3(a, b, c)
      if a < b && a < c
        a
      elsif b < c
        b
      else
        c
      end
    end
    module_function :min3
  end
end