File: 0000-resolve-ties-in-scoring-best-term-suggestions.patch

package info (click to toggle)
golang-github-sajari-fuzzy 1.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,500 kB
  • sloc: makefile: 8
file content (45 lines) | stat: -rw-r--r-- 1,535 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
From: wamuir <17497234+wamuir@users.noreply.github.com>
Date: Fri, 3 Jul 2020 08:22:04 -0700
Subject: resolve ties in scoring best term suggestions

Origin: upstream, https://github.com/sajari/fuzzy/pull/25/commits/153eea5fe92c459656528143252cd576215e621e
Bug: https://github.com/sajari/fuzzy/issues/26
---
 fuzzy.go | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/fuzzy.go b/fuzzy.go
index c6bd6a5..b9b7a2a 100644
--- a/fuzzy.go
+++ b/fuzzy.go
@@ -406,8 +406,20 @@ func (model *Model) corpusCount(input string) int {
 func best(input string, potential map[string]*Potential) string {
 	var best string
 	var bestcalc, bonus int
+
+	// Create a sorted slice of strings to range over as `potential`
+	// is an unordered map (map[string]*Potential).  Thus, for ties
+	// (terms with identical scores) the best term will be the last
+	// tied term when sorted in increasing order (note ge operator)
+	keys := make([]string, 0, len(potential))
+	for k := range potential {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
 	for i := 0; i < 4; i++ {
-		for _, pot := range potential {
+		for _, k := range keys {
+			pot := potential[k]
 			if pot.Leven == 0 {
 				return pot.Term
 			} else if pot.Leven == i {
@@ -416,7 +428,7 @@ func best(input string, potential map[string]*Potential) string {
 				if pot.Term[0] == input[0] {
 					bonus += 100
 				}
-				if pot.Score+bonus > bestcalc {
+				if pot.Score+bonus >= bestcalc {
 					bestcalc = pot.Score + bonus
 					best = pot.Term
 				}