1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
"""
Fuzzy matcher.
This class is used by the [command palette](/guide/command_palette) to match search terms.
This is the matcher that powers Textual's command palette.
Thanks to Will McGugan for the implementation.
"""
from __future__ import annotations
from operator import itemgetter
from re import IGNORECASE, escape, finditer, search
from typing import Iterable, NamedTuple
from textual.cache import LRUCache
class _Search(NamedTuple):
"""Internal structure to keep track of a recursive search."""
candidate_offset: int = 0
query_offset: int = 0
offsets: tuple[int, ...] = ()
def branch(self, offset: int) -> tuple[_Search, _Search]:
"""Branch this search when an offset is found.
Args:
offset: Offset of a matching letter in the query.
Returns:
A pair of search objects.
"""
_, query_offset, offsets = self
return (
_Search(offset + 1, query_offset + 1, offsets + (offset,)),
_Search(offset + 1, query_offset, offsets),
)
@property
def groups(self) -> int:
"""Number of groups in offsets."""
groups = 1
last_offset, *offsets = self.offsets
for offset in offsets:
if offset != last_offset + 1:
groups += 1
last_offset = offset
return groups
class FuzzySearch:
"""Performs a fuzzy search.
Unlike a regex solution, this will finds all possible matches.
"""
cache: LRUCache[tuple[str, str, bool], tuple[float, tuple[int, ...]]] = LRUCache(
1024 * 4
)
def __init__(self, case_sensitive: bool = False) -> None:
"""Initialize fuzzy search.
Args:
case_sensitive: Is the match case sensitive?
"""
self.case_sensitive = case_sensitive
def match(self, query: str, candidate: str) -> tuple[float, tuple[int, ...]]:
"""Match against a query.
Args:
query: The fuzzy query.
candidate: A candidate to check,.
Returns:
A pair of (score, tuple of offsets). `(0, ())` for no result.
"""
query_regex = ".*?".join(f"({escape(character)})" for character in query)
if not search(
query_regex, candidate, flags=0 if self.case_sensitive else IGNORECASE
):
# Bail out early if there is no possibility of a match
return (0.0, ())
cache_key = (query, candidate, self.case_sensitive)
if cache_key in self.cache:
return self.cache[cache_key]
result = max(
self._match(query, candidate), key=itemgetter(0), default=(0.0, ())
)
self.cache[cache_key] = result
return result
def _match(
self, query: str, candidate: str
) -> Iterable[tuple[float, tuple[int, ...]]]:
"""Generator to do the matching.
Args:
query: Query to match.
candidate: Candidate to check against.
Yields:
Pairs of score and tuple of offsets.
"""
if not self.case_sensitive:
query = query.lower()
candidate = candidate.lower()
# We need this to give a bonus to first letters.
first_letters = {match.start() for match in finditer(r"\w+", candidate)}
def score(search: _Search) -> float:
"""Sore a search.
Args:
search: Search object.
Returns:
Score.
"""
# This is a heuristic, and can be tweaked for better results
# Boost first letter matches
offset_count = len(search.offsets)
score: float = offset_count + len(
first_letters.intersection(search.offsets)
)
# Boost to favor less groups
normalized_groups = (offset_count - (search.groups - 1)) / offset_count
score *= 1 + (normalized_groups * normalized_groups)
return score
stack: list[_Search] = [_Search()]
push = stack.append
pop = stack.pop
query_size = len(query)
find = candidate.find
# Limit the number of loops out of an abundance of caution.
# This should be hard to reach without contrived data.
remaining_loops = 10_000
while stack and (remaining_loops := remaining_loops - 1):
search = pop()
offset = find(query[search.query_offset], search.candidate_offset)
if offset != -1:
if not set(candidate[search.candidate_offset :]).issuperset(
query[search.query_offset :]
):
# Early out if there is not change of a match
continue
advance_branch, branch = search.branch(offset)
if advance_branch.query_offset == query_size:
yield score(advance_branch), advance_branch.offsets
push(branch)
else:
push(branch)
push(advance_branch)
|