1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
// Package misspell impements utilities for basic spelling correction.
package misspell
import (
"unicode/utf8"
)
// AlmostEqual reports whether a and b have Damerau-Levenshtein distance of at
// most 1. That is, it reports whether a can be transformed into b by adding,
// removing or substituting a single rune, or by swapping two adjacent runes.
// Invalid runes are considered equal.
//
// It runs in O(len(a)+len(b)) time.
func AlmostEqual(a, b string) bool {
for len(a) > 0 && len(b) > 0 {
ra, tailA := shiftRune(a)
rb, tailB := shiftRune(b)
if ra == rb {
a, b = tailA, tailB
continue
}
// check for addition/deletion/substitution
if equalValid(a, tailB) || equalValid(tailA, b) || equalValid(tailA, tailB) {
return true
}
if len(tailA) == 0 || len(tailB) == 0 {
return false
}
// check for swap
a, b = tailA, tailB
Ra, tailA := shiftRune(tailA)
Rb, tailB := shiftRune(tailB)
return ra == Rb && Ra == rb && equalValid(tailA, tailB)
}
if len(a) == 0 {
return len(b) == 0 || singleRune(b)
}
return singleRune(a)
}
// singleRune reports whether s consists of a single UTF-8 codepoint.
func singleRune(s string) bool {
_, n := utf8.DecodeRuneInString(s)
return n == len(s)
}
// shiftRune splits off the first UTF-8 codepoint from s and returns it and the
// rest of the string. It panics if s is empty.
func shiftRune(s string) (rune, string) {
if len(s) == 0 {
panic(s)
}
r, n := utf8.DecodeRuneInString(s)
return r, s[n:]
}
// equalValid reports whether a and b are equal, if invalid code points are considered equal.
func equalValid(a, b string) bool {
var ra, rb rune
for len(a) > 0 && len(b) > 0 {
ra, a = shiftRune(a)
rb, b = shiftRune(b)
if ra != rb {
return false
}
}
return len(a) == 0 && len(b) == 0
}
|