File: test_detect.py

package info (click to toggle)
utf8-locale 1.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 404 kB
  • sloc: python: 847; ansic: 486; sh: 121; makefile: 21
file content (196 lines) | stat: -rw-r--r-- 6,529 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# SPDX-FileCopyrightText: Peter Pentchev <roam@ringlet.net>
# SPDX-License-Identifier: BSD-2-Clause
"""Test the UTF-8 locale detection functions."""

from __future__ import annotations

import dataclasses
import functools
import json
import os
import pathlib
import subprocess
import typing
from unittest import mock

import pytest

import utf8_locale


if typing.TYPE_CHECKING:
    from collections.abc import Iterable


@dataclasses.dataclass(frozen=True)
class TLangData:
    """The test data for a single preferred languages test case."""

    env: dict[str, str]
    expected: list[str]


@dataclasses.dataclass(frozen=True)
class TData:
    """The test data loaded from the JSON definitions file."""

    locales: list[str]
    languages: list[TLangData]


@functools.lru_cache
def load_test_data() -> TData:
    """Load the test data from the JSON definitions file."""
    raw = json.loads(
        (pathlib.Path(__file__).absolute().parent.parent.parent / "tests/data.json").read_text(
            encoding="UTF-8",
        ),
    )
    assert raw["format"]["version"] == {"major": 0, "minor": 1}

    return TData(
        locales=raw["locales"],
        languages=[
            TLangData(env=item["env"], expected=item["expected"]) for item in raw["languages"]
        ],
    )


LANG_KEYS = {"LC_ALL", "LANGUAGE"}

LANG_EXPECTED = [
    (["C", "en"], "C.UTF-8"),
    (["en", "C"], "en_XX.UTF-8"),
    (["es", "bg", "*"], "it_IT.UTF-8"),
    (["en", "bg", "*"], "en_XX.UTF-8"),
    (["es", "*", "en"], "it_IT.UTF-8"),
    (["es", "*", "it"], "de_DE.UTF-8"),
    (["en", "bg", "en"], "en_XX.UTF-8"),
    (["it", "en", "it"], "it_IT.UTF-8"),
    (["xy", "yz", "xy", "en"], "en_XX.UTF-8"),
]


def check_env(env: dict[str, str]) -> None:
    """Make sure a UTF8-capable environment was setup correctly."""
    # Everything except LANG_KEYS is the same as in os.environ
    assert {key: value for key, value in env.items() if key not in LANG_KEYS} == {
        key: value for key, value in os.environ.items() if key not in LANG_KEYS
    }

    # The rest of this function makes sure that locale(1) and date(1), when
    # run in this environment, output reasonable values
    loc = {
        fields[0]: fields[1].strip('"')
        for fields in (
            line.split("=", 1)
            for line in subprocess.check_output(
                ["locale"],
                shell=False,
                env=env,
                encoding="UTF-8",
            ).splitlines()
        )
    }
    non_lc = {name for name in loc if not name.startswith("LC_")}
    assert non_lc.issubset({"LANG", "LANGUAGE"})
    loc = {name: value for name, value in loc.items() if name.startswith("LC_")}
    values = list(set(loc.values()))
    assert len(values) == 1, values
    assert values[0].lower().endswith(".utf8") or values[0].lower().endswith(".utf-8")

    utc_env = dict(env)
    utc_env["TZ"] = "UTC"
    lines = subprocess.check_output(
        ["date", "-d", "@1000000000", "+%A"],
        shell=False,
        env=utc_env,
        encoding="UTF-8",
    ).splitlines()
    assert lines in [["Sunday"], ["Sonntag"], ["domenica"], ["domingo"]]


def get_mod_env() -> dict[str, str]:
    """Get a slightly modified copy of os.environ for test purposes."""
    mod_env = {
        key: value
        for key, value in os.environ.items()
        if key not in {"HOME", "USER", "PS1", "PATH"}
    }
    mod_env["TEST_KEY"] = "test value"
    return mod_env


def check_mod_env(env: dict[str, str], mod_env: dict[str, str], env2: dict[str, str]) -> None:
    """Make sure very little has changed in the prepared environment."""
    # Nothing besides LANG_KEYS has changed
    assert {key: value for key, value in env2.items() if key not in LANG_KEYS} == {
        key: value for key, value in mod_env.items() if key not in LANG_KEYS
    }

    # LANG_KEYS have changed in the same way as before
    assert {key: value for key, value in env2.items() if key in LANG_KEYS} == {
        key: value for key, value in env.items() if key in LANG_KEYS
    }


def test_utf8_env() -> None:
    """Test get_utf8_env() and, indirectly, detect_utf8_locale()."""
    env = utf8_locale.get_utf8_env()
    check_env(env)

    mod_env = get_mod_env()
    env2 = utf8_locale.get_utf8_env(mod_env)
    check_mod_env(env, mod_env, env2)


def mock_locale() -> mock._patch[mock.Mock]:
    """Mock subprocess.check_output("locale -a")."""
    locales = load_test_data().locales
    mock_check_output = mock.Mock(spec=["__call__"])
    mock_check_output.return_value = "".join(item + "\n" for item in locales)
    return mock.patch("subprocess.check_output", new=mock_check_output)


class TestLanguages:
    """Test the language preference handling of detect_utf8_locale()."""

    @staticmethod
    def detect_locale(languages: Iterable[str]) -> str:
        """Get the locale name using the appropriate mechanism."""
        with mock_locale():
            return utf8_locale.detect_utf8_locale(languages=languages)

    @staticmethod
    def get_vars(languages: Iterable[str]) -> dict[str, str]:
        """Get the variables dict using the appropriate mechanism."""
        with mock_locale():
            return utf8_locale.get_utf8_vars(languages=languages)

    @staticmethod
    def get_langs(env: dict[str, str]) -> list[str]:
        """Get the preferred languages using the appropriate mechanism."""
        return utf8_locale.get_preferred_languages(env)

    @pytest.mark.parametrize(("languages", "result"), LANG_EXPECTED)
    def test_language(self, languages: list[str], result: str) -> None:
        """Test detect_utf8_locale() with some languages specified."""
        assert self.detect_locale(iter(languages)) == result

    @pytest.mark.parametrize(("languages", "result"), LANG_EXPECTED)
    def test_language_vars(self, languages: list[str], result: str) -> None:
        """Test detect_utf8_locale() with some languages specified."""
        assert self.get_vars(iter(languages)) == {
            "LC_ALL": result,
            "LANGUAGE": "",
        }

    def test_no_languages(self) -> None:
        """Test detect_utf8_locale() with no languages specified."""
        with pytest.raises(utf8_locale.NoLanguagesError):
            self.detect_locale(languages=iter([]))

    @pytest.mark.parametrize("tcase", load_test_data().languages)
    def test_preferred(self, tcase: TLangData) -> None:
        """Test get_preferred_languages() with the specified environment."""
        assert self.get_langs(tcase.env) == tcase.expected