File: _config.py

package info (click to toggle)
python-wn 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,100 kB
  • sloc: python: 8,429; xml: 566; sql: 238; makefile: 12
file content (299 lines) | stat: -rw-r--r-- 10,025 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
"""
Local configuration settings.
"""

from collections.abc import Sequence
from importlib.resources import as_file, files
from pathlib import Path
from typing import Any

try:
    # python_version >= 3.11
    import tomllib  # type: ignore
except ImportError:
    import tomli as tomllib  # type: ignore

from wn._exceptions import ConfigurationError, ProjectError
from wn._types import AnyPath
from wn._util import format_lexicon_specifier, short_hash, split_lexicon_specifier
from wn.constants import _WORDNET

# The index file is a project file of Wn
with as_file(files("wn") / "index.toml") as index_file:
    INDEX_FILE_PATH = index_file
# The directory where downloaded and added data will be stored.
DEFAULT_DATA_DIRECTORY = Path.home() / ".wn_data"
DATABASE_FILENAME = "wn.db"


class WNConfig:
    def __init__(self):
        self._data_directory = DEFAULT_DATA_DIRECTORY
        self._projects = {}
        self._dbpath = self._data_directory / DATABASE_FILENAME
        self.allow_multithreading = False

    @property
    def data_directory(self) -> Path:
        """The file system directory where Wn's data is stored.

        Assign a new path to change where the database and downloads
        are stored.

        >>> wn.config.data_directory = "~/.cache/wn"
        >>> wn.config.database_path
        PosixPath('/home/username/.cache/wn/wn.db')
        >>> wn.config.downloads_directory
        PosixPath('/home/username/.cache/wn/downloads')

        """
        dir = self._data_directory
        dir.mkdir(exist_ok=True)
        return dir

    @data_directory.setter
    def data_directory(self, path):
        dir = Path(path).expanduser()
        if dir.exists() and not dir.is_dir():
            raise ConfigurationError(f"path exists and is not a directory: {dir}")
        self._data_directory = dir
        self._dbpath = dir / DATABASE_FILENAME

    @property
    def database_path(self) -> Path:
        """The path to the database file.

        The database path is derived from :attr:`data_directory` and
        cannot be changed directly.

        """
        return self._dbpath

    @property
    def downloads_directory(self) -> Path:
        """The file system directory where downloads are cached.

        The downloads directory is derived from :attr:`data_directory`
        and cannot be changed directly.

        """
        dir = self.data_directory / "downloads"
        dir.mkdir(exist_ok=True)
        return dir

    @property
    def index(self) -> dict[str, dict]:
        """The project index."""
        return self._projects

    def add_project(
        self,
        id: str,
        type: str = _WORDNET,
        label: str | None = None,
        language: str | None = None,
        license: str | None = None,
        error: str | None = None,
    ) -> None:
        """Add a new wordnet project to the index.

        Arguments:
            id: short identifier of the project
            type: project type (default 'wordnet')
            label: full name of the project
            language: `BCP 47`_ language code of the resource
            license: link or name of the project's default license
            error: if set, the error message to use when the project
              is accessed

        .. _BCP 47: https://en.wikipedia.org/wiki/IETF_language_tag
        """
        if id in self._projects:
            raise ValueError(f"project already added: {id}")
        self._projects[id] = {
            "type": type,
            "label": label,
            "language": language,
            "versions": {},
            "license": license,
        }
        if error:
            self._projects[id]["error"] = error

    def add_project_version(
        self,
        id: str,
        version: str,
        url: str | None = None,
        error: str | None = None,
        license: str | None = None,
    ) -> None:
        """Add a new resource version for a project.

        Exactly one of *url* or *error* must be specified.

        Arguments:
            id: short identifier of the project
            version: version string of the resource
            url: space-separated list of web addresses for the resource
            license: link or name of the resource's license; if not
              given, the project's default license will be used.
            error: if set, the error message to use when the project
              is accessed

        """
        version_data: dict[str, Any]
        if url and not error:
            version_data = {"resource_urls": url.split()}
        elif error and not url:
            version_data = {"error": error}
        elif url and error:
            spec = format_lexicon_specifier(id, version)
            raise ConfigurationError(f"{spec} specifies both url and redirect")
        else:
            version_data = {}
        if license:
            version_data["license"] = license
        project = self._projects[id]
        project["versions"][version] = version_data

    def get_project_info(self, arg: str) -> dict:
        """Return information about an indexed project version.

        If the project has been downloaded and cached, the ``"cache"``
        key will point to the path of the cached file, otherwise its
        value is ``None``.

        Arguments:
            arg: a project specifier

        Example:

            >>> info = wn.config.get_project_info("oewn:2021")
            >>> info["label"]
            'Open English WordNet'

        """
        id, version = split_lexicon_specifier(arg)
        if id not in self._projects:
            raise ProjectError(f"no such project id: {id}")
        project: dict = self._projects[id]
        if "error" in project:
            raise ProjectError(project["error"])

        versions: dict = project["versions"]
        if not version or version == "*":
            version = next(iter(versions), "")
        if not version:
            raise ProjectError(f"no versions available for {id}")
        elif version not in versions:
            raise ProjectError(f"no such version: {version!r} ({id})")
        info = versions[version]
        if "error" in info:
            raise ProjectError(info["error"])

        urls = info.get("resource_urls", [])

        return {
            "id": id,
            "version": version,
            "type": project["type"],
            "label": project["label"],
            "language": project["language"],
            "license": info.get("license", project.get("license")),
            "resource_urls": urls,
            "cache": _get_cache_path_for_urls(self, urls),
        }

    def get_cache_path(self, url: str) -> Path:
        """Return the path for caching *url*.

        Note that in general this is just a path operation and does
        not signify that the file exists in the file system.

        """
        filename = short_hash(url)
        return self.downloads_directory / filename

    def update(self, data: dict) -> None:
        """Update the configuration with items in *data*.

        Items are only inserted or replaced, not deleted. If a project
        index is provided in the ``"index"`` key, then either the
        project must not already be indexed or any project fields
        (label, language, or license) that are specified must be equal
        to the indexed project.

        """
        if "data_directory" in data:
            self.data_directory = data["data_directory"]
        for id, project in data.get("index", {}).items():
            if id in self._projects:
                # validate that they are the same
                _project = self._projects[id]
                for attr in ("label", "language", "license"):
                    if attr in project and project[attr] != _project[attr]:
                        raise ConfigurationError(f"{attr} mismatch for {id}")
            else:
                self.add_project(
                    id,
                    type=project.get("type", _WORDNET),
                    label=project.get("label"),
                    language=project.get("language"),
                    license=project.get("license"),
                    error=project.get("error"),
                )
            for version, info in project.get("versions", {}).items():
                if "url" in info and "error" in project:
                    spec = format_lexicon_specifier(id, version)
                    raise ConfigurationError(f"{spec} url specified with default error")
                self.add_project_version(
                    id,
                    version,
                    url=info.get("url"),
                    license=info.get("license"),
                    error=info.get("error"),
                )

    def load_index(self, path: AnyPath) -> None:
        """Load and update with the project index at *path*.

        The project index is a TOML_ file containing project and
        version information. For example:

        .. code-block:: toml

           [ewn]
             label = "Open English WordNet"
             language = "en"
             license = "https://creativecommons.org/licenses/by/4.0/"
             [ewn.versions.2019]
               url = "https://en-word.net/static/english-wordnet-2019.xml.gz"
             [ewn.versions.2020]
               url = "https://en-word.net/static/english-wordnet-2020.xml.gz"

        .. _TOML: https://toml.io

        """
        path = Path(path).expanduser()
        with path.open("rb") as indexfile:
            try:
                index = tomllib.load(indexfile)
            except tomllib.TOMLDecodeError as exc:
                raise ConfigurationError("malformed index file") from exc
        self.update({"index": index})


def _get_cache_path_for_urls(
    config: WNConfig,
    urls: Sequence[str],
) -> Path | None:
    for url in urls:
        path = config.get_cache_path(url)
        if path.is_file():
            return path
    return None


config = WNConfig()
config.load_index(INDEX_FILE_PATH)