1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
# A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm
# as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
# Heavily modified to provide more subjectively useful results
# for on the Nim manual.
#
import strutils
import math
const
MaxUnmatchedLeadingChar = 3
## Maximum number of times the penalty for unmatched leading chars is applied.
HeadingScaleFactor = 0.5
## The score from before the colon Char is multiplied by this.
## This is to weight function signatures and descriptions over module titles.
type
ScoreCard = enum
StartMatch = -100 ## Start matching.
LeadingCharDiff = -3 ## An unmatched, leading character was found.
CharDiff = -1 ## An unmatched character was found.
CharMatch = 0 ## A matched character was found.
ConsecutiveMatch = 5 ## A consecutive match was found.
LeadingCharMatch = 10 ## The character matches the beginning of the
## string or the first character of a word
## or camel case boundary.
WordBoundryMatch = 20 ## The last ConsecutiveCharMatch that
## immediately precedes the end of the string,
## end of the pattern, or a LeadingCharMatch.
proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] =
var
scoreState = StartMatch
headerMatched = false
unmatchedLeadingCharCount = 0
consecutiveMatchCount = 0
strIndex = 0
patIndex = 0
score = 0
template transition(nextState) =
scoreState = nextState
score += ord(scoreState)
while (strIndex < str.len) and (patIndex < pattern.len):
var
patternChar = pattern[patIndex].toLowerAscii
strChar = str[strIndex].toLowerAscii
# Ignore certain characters
if patternChar in {'_', ' ', '.'}:
patIndex += 1
continue
if strChar in {'_', ' ', '.'}:
strIndex += 1
continue
# Since this algorithm will be used to search against Nim documentation,
# the below logic prioritizes headers.
if not headerMatched and strChar == ':':
headerMatched = true
scoreState = StartMatch
score = int(floor(HeadingScaleFactor * float(score)))
patIndex = 0
strIndex += 1
continue
if strChar == patternChar:
case scoreState
of StartMatch, WordBoundryMatch:
scoreState = LeadingCharMatch
of CharMatch:
transition(ConsecutiveMatch)
of LeadingCharMatch, ConsecutiveMatch:
consecutiveMatchCount += 1
scoreState = ConsecutiveMatch
score += ord(ConsecutiveMatch) * consecutiveMatchCount
if scoreState == LeadingCharMatch:
score += ord(LeadingCharMatch)
var onBoundary = (patIndex == high(pattern))
if not onBoundary and strIndex < high(str):
let
nextPatternChar = toLowerAscii(pattern[patIndex + 1])
nextStrChar = toLowerAscii(str[strIndex + 1])
onBoundary = (
nextStrChar notin {'a'..'z'} and
nextStrChar != nextPatternChar
)
if onBoundary:
transition(WordBoundryMatch)
of CharDiff, LeadingCharDiff:
var isLeadingChar = (
str[strIndex - 1] notin Letters or
str[strIndex - 1] in {'a'..'z'} and
str[strIndex] in {'A'..'Z'}
)
if isLeadingChar:
scoreState = LeadingCharMatch
#a non alpha or a camel case transition counts as a leading char.
# Transition the state, but don't give the bonus yet; wait until we verify a consecutive match.
else:
transition(CharMatch)
patIndex += 1
else:
case scoreState
of StartMatch:
transition(LeadingCharDiff)
of ConsecutiveMatch:
transition(CharDiff)
consecutiveMatchCount = 0
of LeadingCharDiff:
if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar:
transition(LeadingCharDiff)
unmatchedLeadingCharCount += 1
else:
transition(CharDiff)
strIndex += 1
if patIndex == pattern.len and (strIndex == str.len or str[strIndex] notin Letters):
score += 10
result = (
score: max(0, score),
matched: (score > 0),
)
|