1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
|
import pytest
from lunr import lunr
from lunr.languages import LANGUAGE_SUPPORT, SUPPORTED_LANGUAGES
from lunr.pipeline import Pipeline
documents = [
{
"id": "a",
"text": (
"Este es un ejemplo inventado de lo que sería un documento en el "
"idioma que se más se habla en España."
),
"title": "Ejemplo de documento en español",
},
{
"id": "b",
"text": (
"Según un estudio que me acabo de inventar porque soy un experto en"
"idiomas que se hablan en España."
),
"title": "Español es el tercer idioma más hablado del mundo",
},
]
class TestLanguageSupport:
@classmethod
def setup_class(cls):
assert (
LANGUAGE_SUPPORT is True
), "NLTK not found, please run `pip install -e .[languages]`"
def test_lunr_function_raises_if_unsupported_language(self):
with pytest.raises(RuntimeError):
lunr("id", ["title", "text"], documents, "foo")
def test_lunr_function_raises_if_any_unsupported_language_is_passed(self):
with pytest.raises(RuntimeError):
lunr("id", ["title", "text"], documents, ["es", "foo"])
def test_register_languages_in_pipeline_class(self):
for lang in set(SUPPORTED_LANGUAGES) - {"en"}:
assert "stemmer-{}".format(lang) in Pipeline.registered_functions
def test_lunr_function_registers_nltk_stemmers_in_pipeline(self):
idx = lunr("id", ["title", "text"], documents, ["es", "it"])
assert "stemmer-es" in repr(idx.pipeline)
assert "stemmer-it" in repr(idx.pipeline)
def test_lunr_registers_lun_stemmers_in_pipeline_if_language_is_en(self):
idx = lunr("id", ["title", "text"], documents, ["en", "es"])
assert "stemmer,stemmer-es" in repr(idx.pipeline)
def test_search_stems_search_terms(self):
idx = lunr("id", ["title", "text"], documents, "es")
results = idx.search("inventando") # stemmed to "invent"
assert len(results) == 2
def test_search_stems_search_terms_for_both_languages(self):
italian_document = {
"id": "c",
"text": (
"Secondo uno studio che ho appena inventato perché sono un "
"esperto di lingue parlate in Spagna."
),
"title": "Lo spagnolo è la terza lingua più parlata al mondo",
}
idx = lunr(
ref="id",
fields=["title", "text"],
documents=(documents + [italian_document]),
languages=["es", "it"],
)
results = idx.search("spagna")
assert len(results) == 1
results = idx.search("inventando")
assert len(results) == 2
|