1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
from pathlib import Path
import pytest
import wn
@pytest.mark.usefixtures("mini_db_1_1")
def test_wordnet_lexicons():
en = wn.Wordnet("test-en")
assert len(en.lexicons()) == 1
assert len(en.expanded_lexicons()) == 0
en1 = wn.Wordnet("test-en:1")
assert en.lexicons() == en1.lexicons()
assert en.expanded_lexicons() == en1.expanded_lexicons()
en2 = wn.Wordnet(lang="en")
assert len(en2.lexicons()) == 2
assert len(en2.expanded_lexicons()) == 0
es = wn.Wordnet("test-es")
assert len(es.lexicons()) == 1
assert len(es.expanded_lexicons()) == 0
es2 = wn.Wordnet("test-es", expand="test-en")
assert len(es2.lexicons()) == 1
assert len(es2.expanded_lexicons()) == 1
ja = wn.Wordnet("test-ja")
assert len(ja.lexicons()) == 1
assert len(ja.expanded_lexicons()) == 1
ja2 = wn.Wordnet("test-ja", expand="")
assert len(ja2.lexicons()) == 1
assert len(ja2.expanded_lexicons()) == 0
@pytest.mark.usefixtures("mini_db")
def test_wordnet_normalize():
es = wn.Wordnet("test-es")
assert es.words("Informacion") == es.words("información")
assert es.words("ínfórmácíón") == es.words("información")
es = wn.Wordnet("test-es", normalizer=None)
assert es.words("informacion") == []
assert es.words("Información") == []
# The following doesn't necessarily work because any non-None
# normalizer causes the normalized form column to be tested with
# the original form
# es = wn.Wordnet('test-es', normalizer=str.lower)
# assert es.words('informacion') == []
# assert es.words('Información') == es.words('información')
@pytest.mark.usefixtures("mini_db")
def test_wordnet_lemmatize():
# default lemmatizer compares alternative forms
en = wn.Wordnet("test-en")
assert en.words("examples") == []
assert en.words("exemplifying") == en.words("exemplify")
assert en.words("data") == en.words("datum")
en = wn.Wordnet("test-en", search_all_forms=False)
assert en.words("examples") == []
assert en.words("exemplifying") == []
assert en.words("data") == []
def morphy_lite(form, pos):
result = {pos: {form}}
if pos in ("n", None) and form.endswith("s"):
result.setdefault("n", set()).add(form[:-1])
return result
en = wn.Wordnet("test-en", lemmatizer=morphy_lite, search_all_forms=False)
assert en.words("examples", pos="n") == en.words("example")
assert en.words("examples") == en.words("example")
assert en.words("exemplifying") == []
assert en.words("data") == []
en = wn.Wordnet("test-en", lemmatizer=morphy_lite, search_all_forms=True)
assert en.words("data") == en.words("datum")
assert en.words("exemplifying") == en.words("exemplify")
def test_portable_entities_issue_226(monkeypatch, tmp_path, datadir):
dir = tmp_path / "wn_issue_226"
with monkeypatch.context() as m:
m.setattr(wn.config, "data_directory", Path(dir))
wn.add(datadir / "mini-lmf-1.0.xml")
en = wn.Wordnet("test-en")
info1 = en.synsets("information")[0]
wn.remove("test-en")
wn.add(datadir / "mini-lmf-1.0.xml")
info2 = en.synsets("information")[0] # en Wordnet object still works
assert info1 == info2 # synsets are equivalent
wn._db.clear_connections()
|