File: wordnet_test.py

package info (click to toggle)
python-wn 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,100 kB
  • sloc: python: 8,429; xml: 566; sql: 238; makefile: 12
file content (97 lines) | stat: -rw-r--r-- 3,350 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from pathlib import Path

import pytest

import wn


@pytest.mark.usefixtures("mini_db_1_1")
def test_wordnet_lexicons():
    en = wn.Wordnet("test-en")
    assert len(en.lexicons()) == 1
    assert len(en.expanded_lexicons()) == 0

    en1 = wn.Wordnet("test-en:1")
    assert en.lexicons() == en1.lexicons()
    assert en.expanded_lexicons() == en1.expanded_lexicons()

    en2 = wn.Wordnet(lang="en")
    assert len(en2.lexicons()) == 2
    assert len(en2.expanded_lexicons()) == 0

    es = wn.Wordnet("test-es")
    assert len(es.lexicons()) == 1
    assert len(es.expanded_lexicons()) == 0

    es2 = wn.Wordnet("test-es", expand="test-en")
    assert len(es2.lexicons()) == 1
    assert len(es2.expanded_lexicons()) == 1

    ja = wn.Wordnet("test-ja")
    assert len(ja.lexicons()) == 1
    assert len(ja.expanded_lexicons()) == 1

    ja2 = wn.Wordnet("test-ja", expand="")
    assert len(ja2.lexicons()) == 1
    assert len(ja2.expanded_lexicons()) == 0


@pytest.mark.usefixtures("mini_db")
def test_wordnet_normalize():
    es = wn.Wordnet("test-es")
    assert es.words("Informacion") == es.words("información")
    assert es.words("ínfórmácíón") == es.words("información")
    es = wn.Wordnet("test-es", normalizer=None)
    assert es.words("informacion") == []
    assert es.words("Información") == []

    # The following doesn't necessarily work because any non-None
    # normalizer causes the normalized form column to be tested with
    # the original form
    # es = wn.Wordnet('test-es', normalizer=str.lower)
    # assert es.words('informacion') == []
    # assert es.words('Información') == es.words('información')


@pytest.mark.usefixtures("mini_db")
def test_wordnet_lemmatize():
    # default lemmatizer compares alternative forms
    en = wn.Wordnet("test-en")
    assert en.words("examples") == []
    assert en.words("exemplifying") == en.words("exemplify")
    assert en.words("data") == en.words("datum")

    en = wn.Wordnet("test-en", search_all_forms=False)
    assert en.words("examples") == []
    assert en.words("exemplifying") == []
    assert en.words("data") == []

    def morphy_lite(form, pos):
        result = {pos: {form}}
        if pos in ("n", None) and form.endswith("s"):
            result.setdefault("n", set()).add(form[:-1])
        return result

    en = wn.Wordnet("test-en", lemmatizer=morphy_lite, search_all_forms=False)
    assert en.words("examples", pos="n") == en.words("example")
    assert en.words("examples") == en.words("example")
    assert en.words("exemplifying") == []
    assert en.words("data") == []

    en = wn.Wordnet("test-en", lemmatizer=morphy_lite, search_all_forms=True)
    assert en.words("data") == en.words("datum")
    assert en.words("exemplifying") == en.words("exemplify")


def test_portable_entities_issue_226(monkeypatch, tmp_path, datadir):
    dir = tmp_path / "wn_issue_226"
    with monkeypatch.context() as m:
        m.setattr(wn.config, "data_directory", Path(dir))
        wn.add(datadir / "mini-lmf-1.0.xml")
        en = wn.Wordnet("test-en")
        info1 = en.synsets("information")[0]
        wn.remove("test-en")
        wn.add(datadir / "mini-lmf-1.0.xml")
        info2 = en.synsets("information")[0]  # en Wordnet object still works
        assert info1 == info2  # synsets are equivalent
        wn._db.clear_connections()