File: compat_sensekey_test.py

package info (click to toggle)
python-wn 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,100 kB
  • sloc: python: 8,429; xml: 566; sql: 238; makefile: 12
file content (152 lines) | stat: -rw-r--r-- 6,296 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import pytest

import wn
from wn.compat import sensekey


def test_unescape_oewn_sense_key():
    def unescape(s: str) -> str:
        return sensekey.unescape(s, flavor="oewn")

    assert unescape("") == ""
    assert unescape("abc") == "abc"
    assert unescape(".") == "."  # only becomes : in second part of key
    # escape patterns
    assert unescape("-ap-") == "'"
    assert unescape("-ex-") == "!"
    assert unescape("-cm-") == ","
    assert unescape("-cn-") == ":"
    assert unescape("-pl-") == "+"
    assert unescape("-sl-") == "/"
    # adjacent escapes need their own dashes
    assert unescape("-ap-ex-") == "'ex-"
    assert unescape("-ap--ex-") == "'!"
    # invalid escapes are unchanged
    assert unescape("-foo-") == "-foo-"  # not an escape sequence
    assert unescape("-sp-") == "-sp-"  # not valid in lemma portion
    assert unescape("ap-") == "ap-"  # no preceding dash
    assert unescape("-ap") == "-ap"  # no trailing dash
    assert unescape("-AP-") == "-AP-"  # case sensitivity
    # full key, second part escapes differently
    assert unescape("abc__1.23.00..") == "abc%1:23:00::"
    assert unescape("abc__1.23.00.foo-sp-bar.") == "abc%1:23:00:foo_bar:"
    assert unescape("abc__1.23.00.foo-ap-bar.") == "abc%1:23:00:foo-ap-bar:"


def test_escape_oewn_sense_key():
    def escape(s: str) -> str:
        return sensekey.escape(s, flavor="oewn")

    assert escape("") == ""
    assert escape("abc") == "abc"
    assert escape(".") == "."  # only becomes : in second part of key
    # escape patterns
    assert escape("'") == "-ap-"
    assert escape("!") == "-ex-"
    assert escape(",") == "-cm-"
    assert escape(":") == "-cn-"
    assert escape("+") == "-pl-"
    assert escape("/") == "-sl-"
    # adjacent escapes need their own dashes
    assert escape("'!") == "-ap--ex-"
    # full key, second part escapes differently
    assert escape("abc%1:23:00::") == "abc__1.23.00.."
    assert escape("abc%1:23:00:foo_bar:") == "abc__1.23.00.foo-sp-bar."
    assert escape("abc%1:23:00:foo'bar:") == "abc__1.23.00.foo'bar."


def test_unescape_oewn_v2_sense_key():
    def unescape(s: str) -> str:
        return sensekey.unescape(s, flavor="oewn-v2")

    assert unescape("") == ""
    assert unescape("abc") == "abc"
    assert unescape(".") == "."  # only becomes : in second part of key
    # escape patterns
    assert unescape("-apos-") == "'"
    assert unescape("-excl-") == "!"
    assert unescape("-comma-") == ","
    assert unescape("-colon-") == ":"
    assert unescape("-plus-") == "+"
    assert unescape("-sol-") == "/"
    assert unescape("--") == "-"
    # adjacent escapes need their own dashes
    assert unescape("-apos-excl-") == "'excl-"
    assert unescape("-apos--excl-") == "'!"
    # invalid escapes are unchanged
    assert unescape("-foo-") == "-foo-"  # not an escape sequence
    assert unescape("-sp-") == "-sp-"  # not valid in lemma portion
    assert unescape("ap-") == "ap-"  # no preceding dash
    assert unescape("-ap") == "-ap"  # no trailing dash
    assert unescape("-AP-") == "-AP-"  # case sensitivity
    # full key, second part escapes differently
    assert unescape("abc__1.23.00..") == "abc%1:23:00::"
    assert unescape("abc__1.23.00.foo-sp-bar.") == "abc%1:23:00:foo_bar:"
    assert unescape("abc__1.23.00.foo-ap-bar.") == "abc%1:23:00:foo-ap-bar:"


def test_escape_oewn_v2_sense_key():
    def escape(s: str) -> str:
        return sensekey.escape(s, flavor="oewn-v2")

    assert escape("") == ""
    assert escape("abc") == "abc"
    assert escape(".") == "."  # only becomes : in second part of key
    # escape patterns
    assert escape("'") == "-apos-"
    assert escape("!") == "-excl-"
    assert escape(",") == "-comma-"
    assert escape(":") == "-colon-"
    assert escape("+") == "-plus-"
    assert escape("/") == "-sol-"
    assert escape("-") == "--"
    # adjacent escapes need their own dashes
    assert escape("'!") == "-apos--excl-"
    # full key, second part escapes differently
    assert escape("abc%1:23:00::") == "abc__1.23.00.."
    assert escape("abc%1:23:00:foo_bar:") == "abc__1.23.00.foo-sp-bar."
    assert escape("abc%1:23:00:foo'bar:") == "abc__1.23.00.foo'bar."


@pytest.mark.usefixtures("uninitialized_datadir")
def test_sense_key_getter(datadir):
    wn.add(datadir / "sense-key-variations.xml")
    wn.add(datadir / "sense-key-variations2.xml")

    get_omw_sense_key = sensekey.sense_key_getter("omw-en:1.4")
    get_oewn2024_sense_key = sensekey.sense_key_getter("oewn:2024")
    get_oewn2025_sense_key = sensekey.sense_key_getter("oewn:2025")

    omw_sense = wn.sense("omw-en--apos-s_Gravenhage-08950407-n", lexicon="omw-en:1.4")
    oewn2024_sense = wn.sense("oewn--ap-s_gravenhage__1.15.00..", lexicon="oewn:2024")
    oewn2025_sense = wn.sense("oewn--apos-s_gravenhage__1.15.00..", lexicon="oewn:2025")

    assert get_omw_sense_key(omw_sense) == "'s_gravenhage%1:15:00::"
    assert get_omw_sense_key(oewn2024_sense) is None
    assert get_omw_sense_key(oewn2025_sense) is None

    assert get_oewn2024_sense_key(omw_sense) is None
    assert get_oewn2024_sense_key(oewn2024_sense) == "'s_gravenhage%1:15:00::"
    assert get_oewn2024_sense_key(oewn2025_sense) == "-apos-s_gravenhage%1:15:00::"

    assert get_oewn2025_sense_key(omw_sense) is None
    assert get_oewn2025_sense_key(oewn2024_sense) == "-ap-s_gravenhage%1:15:00::"
    assert get_oewn2025_sense_key(oewn2025_sense) == "'s_gravenhage%1:15:00::"


@pytest.mark.usefixtures("uninitialized_datadir")
def test_sense_getter(datadir):
    wn.add(datadir / "sense-key-variations.xml")
    wn.add(datadir / "sense-key-variations2.xml")

    get_omw_sense = sensekey.sense_getter("omw-en:1.4")
    get_oewn2024_sense = sensekey.sense_getter("oewn:2024")
    get_oewn2025_sense = sensekey.sense_getter("oewn:2025")

    omw_sense = wn.sense("omw-en--apos-s_Gravenhage-08950407-n", lexicon="omw-en:1.4")
    oewn2024_sense = wn.sense("oewn--ap-s_gravenhage__1.15.00..", lexicon="oewn:2024")
    oewn2025_sense = wn.sense("oewn--apos-s_gravenhage__1.15.00..", lexicon="oewn:2025")

    assert get_omw_sense("'s_gravenhage%1:15:00::") == omw_sense
    assert get_oewn2024_sense("'s_gravenhage%1:15:00::") == oewn2024_sense
    assert get_oewn2025_sense("'s_gravenhage%1:15:00::") == oewn2025_sense