File: test_language_support.py

package info (click to toggle)
python-lunr 0.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,644 kB
  • sloc: python: 3,811; javascript: 114; makefile: 60
file content (79 lines) | stat: -rw-r--r-- 2,769 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pytest

from lunr import lunr
from lunr.languages import LANGUAGE_SUPPORT, SUPPORTED_LANGUAGES
from lunr.pipeline import Pipeline

documents = [
    {
        "id": "a",
        "text": (
            "Este es un ejemplo inventado de lo que sería un documento en el "
            "idioma que se más se habla en España."
        ),
        "title": "Ejemplo de documento en español",
    },
    {
        "id": "b",
        "text": (
            "Según un estudio que me acabo de inventar porque soy un experto en"
            "idiomas que se hablan en España."
        ),
        "title": "Español es el tercer idioma más hablado del mundo",
    },
]


class TestLanguageSupport:
    @classmethod
    def setup_class(cls):
        assert (
            LANGUAGE_SUPPORT is True
        ), "NLTK not found, please run `pip install -e .[languages]`"

    def test_lunr_function_raises_if_unsupported_language(self):
        with pytest.raises(RuntimeError):
            lunr("id", ["title", "text"], documents, "foo")

    def test_lunr_function_raises_if_any_unsupported_language_is_passed(self):
        with pytest.raises(RuntimeError):
            lunr("id", ["title", "text"], documents, ["es", "foo"])

    def test_register_languages_in_pipeline_class(self):
        for lang in set(SUPPORTED_LANGUAGES) - {"en"}:
            assert "stemmer-{}".format(lang) in Pipeline.registered_functions

    def test_lunr_function_registers_nltk_stemmers_in_pipeline(self):
        idx = lunr("id", ["title", "text"], documents, ["es", "it"])
        assert "stemmer-es" in repr(idx.pipeline)
        assert "stemmer-it" in repr(idx.pipeline)

    def test_lunr_registers_lun_stemmers_in_pipeline_if_language_is_en(self):
        idx = lunr("id", ["title", "text"], documents, ["en", "es"])
        assert "stemmer,stemmer-es" in repr(idx.pipeline)

    def test_search_stems_search_terms(self):
        idx = lunr("id", ["title", "text"], documents, "es")
        results = idx.search("inventando")  # stemmed to "invent"
        assert len(results) == 2

    def test_search_stems_search_terms_for_both_languages(self):
        italian_document = {
            "id": "c",
            "text": (
                "Secondo uno studio che ho appena inventato perché sono un "
                "esperto di lingue parlate in Spagna."
            ),
            "title": "Lo spagnolo è la terza lingua più parlata al mondo",
        }
        idx = lunr(
            ref="id",
            fields=["title", "text"],
            documents=(documents + [italian_document]),
            languages=["es", "it"],
        )
        results = idx.search("spagna")
        assert len(results) == 1

        results = idx.search("inventando")
        assert len(results) == 2