1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
"""Non-public Wn utilities."""
import hashlib
from collections.abc import Hashable, Iterable
from pathlib import Path
from typing import TypeVar
from unicodedata import combining, normalize
from wn._types import VersionInfo
def version_info(version_string: str) -> VersionInfo:
return tuple(map(int, version_string.split(".")))
def is_url(string: str) -> bool:
"""Return True if *string* appears to be a URL."""
# TODO: ETags?
return any(string.startswith(scheme) for scheme in ("http://", "https://"))
def is_gzip(path: Path) -> bool:
"""Return True if the file at *path* appears to be gzipped."""
return _inspect_file_signature(path, b"\x1f\x8b")
def is_lzma(path: Path) -> bool:
"""Return True if the file at *path* appears to be lzma-compressed."""
return _inspect_file_signature(path, b"\xfd7zXZ\x00")
def is_xml(path: Path) -> bool:
"""Return True if the file at *path* appears to be an XML file."""
return _inspect_file_signature(path, b"<?xml ")
def _inspect_file_signature(path: Path, signature: bytes) -> bool:
if path.is_file():
with path.open("rb") as f:
return f.read(len(signature)) == signature
return False
def short_hash(string: str) -> str:
"""Return a short hash of *string*."""
b2 = hashlib.blake2b(digest_size=20)
b2.update(string.encode("utf-8"))
return b2.hexdigest()
T = TypeVar("T")
def flatten(iterable: Iterable[Iterable[T]]) -> list[T]:
return [x for xs in iterable for x in xs]
H = TypeVar("H", bound=Hashable)
def unique_list(items: Iterable[H]) -> list[H]:
# use a dictionary as an order-preserving set
targets = dict.fromkeys(items, True)
return list(targets)
def normalize_form(s: str) -> str:
return "".join(c for c in normalize("NFKD", s.casefold()) if not combining(c))
def format_lexicon_specifier(id: str, version: str) -> str:
return f"{id}:{version}"
def split_lexicon_specifier(lexicon: str) -> tuple[str, str]:
id, _, ver = lexicon.partition(":")
return id, ver
|