1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
|
From: wamuir <17497234+wamuir@users.noreply.github.com>
Date: Fri, 3 Jul 2020 08:22:04 -0700
Subject: resolve ties in scoring best term suggestions
Origin: upstream, https://github.com/sajari/fuzzy/pull/25/commits/153eea5fe92c459656528143252cd576215e621e
Bug: https://github.com/sajari/fuzzy/issues/26
---
fuzzy.go | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/fuzzy.go b/fuzzy.go
index c6bd6a5..b9b7a2a 100644
--- a/fuzzy.go
+++ b/fuzzy.go
@@ -406,8 +406,20 @@ func (model *Model) corpusCount(input string) int {
func best(input string, potential map[string]*Potential) string {
var best string
var bestcalc, bonus int
+
+ // Create a sorted slice of strings to range over as `potential`
+ // is an unordered map (map[string]*Potential). Thus, for ties
+ // (terms with identical scores) the best term will be the last
+ // tied term when sorted in increasing order (note ge operator)
+ keys := make([]string, 0, len(potential))
+ for k := range potential {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
for i := 0; i < 4; i++ {
- for _, pot := range potential {
+ for _, k := range keys {
+ pot := potential[k]
if pot.Leven == 0 {
return pot.Term
} else if pot.Leven == i {
@@ -416,7 +428,7 @@ func best(input string, potential map[string]*Potential) string {
if pot.Term[0] == input[0] {
bonus += 100
}
- if pot.Score+bonus > bestcalc {
+ if pot.Score+bonus >= bestcalc {
bestcalc = pot.Score + bonus
best = pot.Term
}
|