File: test_plugins.py

package info (click to toggle)
python-lunr 0.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,644 kB
  • sloc: python: 3,811; javascript: 114; makefile: 60
file content (61 lines) | stat: -rw-r--r-- 1,899 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from lunr import lunr, get_default_builder
from lunr.pipeline import Pipeline
from lunr.stemmer import stemmer
from lunr.trimmer import trimmer
from lunr.stop_word_filter import stop_word_filter

documents = [
    {
        "id": "a",
        "title": "Mr. Green kills Colonel Mustard",
        "body": """Mr. Green killed Colonel Mustard in the study with the
candlestick. Mr. Green is not a very nice fellow.""",
        "word_count": 19,
    },
    {
        "id": "b",
        "title": "Plumb waters plant",
        "body": "Professor Plumb has a green plant in his study",
        "word_count": 9,
    },
    {
        "id": "c",
        "title": "Scarlett helps Professor",
        "body": """Miss Scarlett watered Professor Plumbs green plant
while he was away from his office last week.""",
        "word_count": 16,
    },
]


def test_get_default_builder():
    builder = get_default_builder()
    assert builder.pipeline._stack == [trimmer, stop_word_filter, stemmer]
    assert builder.search_pipeline._stack == [stemmer]


def test_drop_pipeline_function():
    builder = get_default_builder()
    builder.pipeline.remove(stemmer)

    idx = lunr("id", ("title", "body"), documents, builder=builder)

    assert idx.search("kill") == []  # no match because "killed" was not stemmed


def test_add_token_metadata():
    builder = get_default_builder()

    def token_length(token, i, tokens):
        token.metadata["token_length"] = len(str(token))
        return token

    Pipeline.register_function(token_length)
    builder.pipeline.add(token_length)
    builder.metadata_whitelist.append("token_length")

    idx = lunr("id", ("title", "body"), documents, builder=builder)

    [result, _, _] = idx.search("green")
    assert result["match_data"].metadata["green"]["title"]["token_length"] == [5]
    assert result["match_data"].metadata["green"]["body"]["token_length"] == [5, 5]