1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
|
"""
Local configuration settings.
"""
from collections.abc import Sequence
from importlib.resources import as_file, files
from pathlib import Path
from typing import Any
try:
# python_version >= 3.11
import tomllib # type: ignore
except ImportError:
import tomli as tomllib # type: ignore
from wn._exceptions import ConfigurationError, ProjectError
from wn._types import AnyPath
from wn._util import format_lexicon_specifier, short_hash, split_lexicon_specifier
from wn.constants import _WORDNET
# The index file is a project file of Wn
with as_file(files("wn") / "index.toml") as index_file:
INDEX_FILE_PATH = index_file
# The directory where downloaded and added data will be stored.
DEFAULT_DATA_DIRECTORY = Path.home() / ".wn_data"
DATABASE_FILENAME = "wn.db"
class WNConfig:
def __init__(self):
self._data_directory = DEFAULT_DATA_DIRECTORY
self._projects = {}
self._dbpath = self._data_directory / DATABASE_FILENAME
self.allow_multithreading = False
@property
def data_directory(self) -> Path:
"""The file system directory where Wn's data is stored.
Assign a new path to change where the database and downloads
are stored.
>>> wn.config.data_directory = "~/.cache/wn"
>>> wn.config.database_path
PosixPath('/home/username/.cache/wn/wn.db')
>>> wn.config.downloads_directory
PosixPath('/home/username/.cache/wn/downloads')
"""
dir = self._data_directory
dir.mkdir(exist_ok=True)
return dir
@data_directory.setter
def data_directory(self, path):
dir = Path(path).expanduser()
if dir.exists() and not dir.is_dir():
raise ConfigurationError(f"path exists and is not a directory: {dir}")
self._data_directory = dir
self._dbpath = dir / DATABASE_FILENAME
@property
def database_path(self) -> Path:
"""The path to the database file.
The database path is derived from :attr:`data_directory` and
cannot be changed directly.
"""
return self._dbpath
@property
def downloads_directory(self) -> Path:
"""The file system directory where downloads are cached.
The downloads directory is derived from :attr:`data_directory`
and cannot be changed directly.
"""
dir = self.data_directory / "downloads"
dir.mkdir(exist_ok=True)
return dir
@property
def index(self) -> dict[str, dict]:
"""The project index."""
return self._projects
def add_project(
self,
id: str,
type: str = _WORDNET,
label: str | None = None,
language: str | None = None,
license: str | None = None,
error: str | None = None,
) -> None:
"""Add a new wordnet project to the index.
Arguments:
id: short identifier of the project
type: project type (default 'wordnet')
label: full name of the project
language: `BCP 47`_ language code of the resource
license: link or name of the project's default license
error: if set, the error message to use when the project
is accessed
.. _BCP 47: https://en.wikipedia.org/wiki/IETF_language_tag
"""
if id in self._projects:
raise ValueError(f"project already added: {id}")
self._projects[id] = {
"type": type,
"label": label,
"language": language,
"versions": {},
"license": license,
}
if error:
self._projects[id]["error"] = error
def add_project_version(
self,
id: str,
version: str,
url: str | None = None,
error: str | None = None,
license: str | None = None,
) -> None:
"""Add a new resource version for a project.
Exactly one of *url* or *error* must be specified.
Arguments:
id: short identifier of the project
version: version string of the resource
url: space-separated list of web addresses for the resource
license: link or name of the resource's license; if not
given, the project's default license will be used.
error: if set, the error message to use when the project
is accessed
"""
version_data: dict[str, Any]
if url and not error:
version_data = {"resource_urls": url.split()}
elif error and not url:
version_data = {"error": error}
elif url and error:
spec = format_lexicon_specifier(id, version)
raise ConfigurationError(f"{spec} specifies both url and redirect")
else:
version_data = {}
if license:
version_data["license"] = license
project = self._projects[id]
project["versions"][version] = version_data
def get_project_info(self, arg: str) -> dict:
"""Return information about an indexed project version.
If the project has been downloaded and cached, the ``"cache"``
key will point to the path of the cached file, otherwise its
value is ``None``.
Arguments:
arg: a project specifier
Example:
>>> info = wn.config.get_project_info("oewn:2021")
>>> info["label"]
'Open English WordNet'
"""
id, version = split_lexicon_specifier(arg)
if id not in self._projects:
raise ProjectError(f"no such project id: {id}")
project: dict = self._projects[id]
if "error" in project:
raise ProjectError(project["error"])
versions: dict = project["versions"]
if not version or version == "*":
version = next(iter(versions), "")
if not version:
raise ProjectError(f"no versions available for {id}")
elif version not in versions:
raise ProjectError(f"no such version: {version!r} ({id})")
info = versions[version]
if "error" in info:
raise ProjectError(info["error"])
urls = info.get("resource_urls", [])
return {
"id": id,
"version": version,
"type": project["type"],
"label": project["label"],
"language": project["language"],
"license": info.get("license", project.get("license")),
"resource_urls": urls,
"cache": _get_cache_path_for_urls(self, urls),
}
def get_cache_path(self, url: str) -> Path:
"""Return the path for caching *url*.
Note that in general this is just a path operation and does
not signify that the file exists in the file system.
"""
filename = short_hash(url)
return self.downloads_directory / filename
def update(self, data: dict) -> None:
"""Update the configuration with items in *data*.
Items are only inserted or replaced, not deleted. If a project
index is provided in the ``"index"`` key, then either the
project must not already be indexed or any project fields
(label, language, or license) that are specified must be equal
to the indexed project.
"""
if "data_directory" in data:
self.data_directory = data["data_directory"]
for id, project in data.get("index", {}).items():
if id in self._projects:
# validate that they are the same
_project = self._projects[id]
for attr in ("label", "language", "license"):
if attr in project and project[attr] != _project[attr]:
raise ConfigurationError(f"{attr} mismatch for {id}")
else:
self.add_project(
id,
type=project.get("type", _WORDNET),
label=project.get("label"),
language=project.get("language"),
license=project.get("license"),
error=project.get("error"),
)
for version, info in project.get("versions", {}).items():
if "url" in info and "error" in project:
spec = format_lexicon_specifier(id, version)
raise ConfigurationError(f"{spec} url specified with default error")
self.add_project_version(
id,
version,
url=info.get("url"),
license=info.get("license"),
error=info.get("error"),
)
def load_index(self, path: AnyPath) -> None:
"""Load and update with the project index at *path*.
The project index is a TOML_ file containing project and
version information. For example:
.. code-block:: toml
[ewn]
label = "Open English WordNet"
language = "en"
license = "https://creativecommons.org/licenses/by/4.0/"
[ewn.versions.2019]
url = "https://en-word.net/static/english-wordnet-2019.xml.gz"
[ewn.versions.2020]
url = "https://en-word.net/static/english-wordnet-2020.xml.gz"
.. _TOML: https://toml.io
"""
path = Path(path).expanduser()
with path.open("rb") as indexfile:
try:
index = tomllib.load(indexfile)
except tomllib.TOMLDecodeError as exc:
raise ConfigurationError("malformed index file") from exc
self.update({"index": index})
def _get_cache_path_for_urls(
config: WNConfig,
urls: Sequence[str],
) -> Path | None:
for url in urls:
path = config.get_cache_path(url)
if path.is_file():
return path
return None
config = WNConfig()
config.load_index(INDEX_FILE_PATH)
|