File: trimmer.py

package info (click to toggle)
python-lunr 0.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,644 kB
  • sloc: python: 3,811; javascript: 114; makefile: 60
file content (21 lines) | stat: -rw-r--r-- 619 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import re


def generate_trimmer(word_characters):
    """Returns a trimmer function from a string of word characters.

    TODO: lunr-languages ships with lists of word characters for each language
    I haven't found an equivalent in Python, we may need to copy it.
    """
    full_re = re.compile(r"^[^{0}]*?([{0}]+)[^{0}]*?$".format(word_characters))

    def trimmer(token, i=None, tokens=None):
        def trim(s, metadata=None):
            match = full_re.match(s)
            if match is None:
                return s
            return match.group(1)

        return token.update(trim)

    return trimmer