File: _util.py

package info (click to toggle)
python-wn 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,100 kB
  • sloc: python: 8,429; xml: 566; sql: 238; makefile: 12
file content (77 lines) | stat: -rw-r--r-- 2,050 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Non-public Wn utilities."""

import hashlib
from collections.abc import Hashable, Iterable
from pathlib import Path
from typing import TypeVar
from unicodedata import combining, normalize

from wn._types import VersionInfo


def version_info(version_string: str) -> VersionInfo:
    return tuple(map(int, version_string.split(".")))


def is_url(string: str) -> bool:
    """Return True if *string* appears to be a URL."""
    # TODO: ETags?
    return any(string.startswith(scheme) for scheme in ("http://", "https://"))


def is_gzip(path: Path) -> bool:
    """Return True if the file at *path* appears to be gzipped."""
    return _inspect_file_signature(path, b"\x1f\x8b")


def is_lzma(path: Path) -> bool:
    """Return True if the file at *path* appears to be lzma-compressed."""
    return _inspect_file_signature(path, b"\xfd7zXZ\x00")


def is_xml(path: Path) -> bool:
    """Return True if the file at *path* appears to be an XML file."""
    return _inspect_file_signature(path, b"<?xml ")


def _inspect_file_signature(path: Path, signature: bytes) -> bool:
    if path.is_file():
        with path.open("rb") as f:
            return f.read(len(signature)) == signature
    return False


def short_hash(string: str) -> str:
    """Return a short hash of *string*."""
    b2 = hashlib.blake2b(digest_size=20)
    b2.update(string.encode("utf-8"))
    return b2.hexdigest()


T = TypeVar("T")


def flatten(iterable: Iterable[Iterable[T]]) -> list[T]:
    return [x for xs in iterable for x in xs]


H = TypeVar("H", bound=Hashable)


def unique_list(items: Iterable[H]) -> list[H]:
    # use a dictionary as an order-preserving set
    targets = dict.fromkeys(items, True)
    return list(targets)


def normalize_form(s: str) -> str:
    return "".join(c for c in normalize("NFKD", s.casefold()) if not combining(c))


def format_lexicon_specifier(id: str, version: str) -> str:
    return f"{id}:{version}"


def split_lexicon_specifier(lexicon: str) -> tuple[str, str]:
    id, _, ver = lexicon.partition(":")
    return id, ver