File: l10n_helper.py

package info (click to toggle)
python-holidays 0.78-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 54,836 kB
  • sloc: python: 106,063; javascript: 85; makefile: 59
file content (257 lines) | stat: -rw-r--r-- 10,106 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env python3


#  holidays
#  --------
#  A fast, efficient Python library for generating country, province and state
#  specific sets of holidays on the fly. It aims to make determining whether a
#  specific date is a holiday as fast and flexible as possible.
#
#  Authors: Vacanza Team and individual contributors (see CONTRIBUTORS file)
#           dr-prodigy <dr.prodigy.github@gmail.com> (c) 2017-2023
#           ryanss <ryanssdev@icloud.com> (c) 2014-2017
#  Website: https://github.com/vacanza/holidays
#  License: MIT (see LICENSE file)

# ruff: noqa: T201

import argparse
import csv
import importlib
import sys
from collections import defaultdict
from pathlib import Path

from polib import pofile

WRAP_WIDTH = 99


class LocalizationHelper:
    def __init__(self) -> None:
        arg_parser = argparse.ArgumentParser()
        arg_parser.add_argument("country_code", help="Country code", type=str)
        arg_parser.add_argument("--create-po", action="store_true", help="Create .po files")
        arg_parser.add_argument("--export-csv", action="store_true", help="Export to .csv")
        arg_parser.add_argument("--import-csv", action="store_true", help="Import from .csv")
        arg_parser.add_argument(
            "--fill-missing",
            action="store_true",
            help="Fill missing translations from other countries",
        )
        arg_parser.add_argument(
            "--bom",
            action=argparse.BooleanOptionalAction,
            default=False,
            help="Use files with BOM on export/import",
        )
        arg_parser.add_argument(
            "--overwrite",
            action=argparse.BooleanOptionalAction,
            default=False,
            help="Overwrite existing files",
        )
        arg_parser.add_argument(
            "--strict-locale",
            action=argparse.BooleanOptionalAction,
            default=True,
            help="Use strict locale when searching for suitable translations",
        )
        self.args = arg_parser.parse_args()
        if not any(
            getattr(self.args, action.dest)
            for action in arg_parser._actions
            if isinstance(action, argparse._StoreTrueAction)
        ):
            print("No action specified.\n")
            arg_parser.print_help()
            sys.exit(1)

        self.country_code = self.args.country_code
        self.encoding = "utf-8-sig" if self.args.bom else "utf-8"
        self.csv_file_path = Path(f"holidays/locale/csv/{self.country_code}.csv")

    def create_po_files(self):
        sys.path.append(str(Path.cwd()))  # Make holidays visible.
        from holidays.registry import COUNTRIES

        class_name = None
        for module_name, entity_data in COUNTRIES.items():
            if entity_data[1] == self.country_code:
                class_name = entity_data[0]
                break
        if not class_name:
            print(f"Class for country code {self.country_code} not found!")
            sys.exit(1)

        cls = getattr(importlib.import_module(f"holidays.countries.{module_name}"), class_name)
        docstring = cls.__doc__

        header = [
            f"# {line}" if line else "#"
            for line in Path("docs/file_header.txt").read_text(encoding="utf-8").split("\n")
        ]
        country_name = "COUNTRY"
        if (title := docstring.split("\n")[0]) and title.endswith(" holidays."):
            country_name = title.split("holidays")[0].strip()

        locale_directory = Path("holidays/locale")
        pot_file_path = locale_directory / "pot" / f"{self.country_code}.pot"
        pot_file = pofile(pot_file_path, wrapwidth=WRAP_WIDTH)
        for entry in pot_file:
            entry.occurrences.clear()

        for language in cls.supported_languages:
            po_directory = locale_directory / language / "LC_MESSAGES"
            po_directory.mkdir(parents=True, exist_ok=True)
            po_file_path = po_directory / f"{self.country_code}.po"
            if not self.args.overwrite and po_file_path.exists():
                continue
            pot_file.metadata["Language"] = language
            pot_file.save(po_file_path, newline="\n")

            file_content = po_file_path.read_text(encoding="utf-8").split("\n")
            if file_content[0] != "# SOME DESCRIPTIVE TITLE":
                continue
            l10n_suffix = f" {language} localization" if language != cls.default_language else ""
            title = f"# {country_name} holidays{l10n_suffix}."
            file_content = header + [title, "#"] + file_content[4:]
            po_file_path.write_text("\n".join(file_content), encoding="utf-8", newline="\n")

        if cls.default_language == "en_US":
            return None

        en_us_po_path = locale_directory / "en_us/LC_MESSAGES" / f"{self.country_code}.po"
        po_file = pofile(en_us_po_path, wrapwidth=WRAP_WIDTH)
        for po_entry in po_file:
            if po_entry.comment:
                po_entry.msgstr = po_entry.comment.removesuffix(".")
        po_file.save(en_us_po_path, newline="\n")

    def export_to_csv(self):
        if not self.args.overwrite and self.csv_file_path.exists():
            print(f"File {self.csv_file_path} exists and no --overwrite option!")
            sys.exit(1)

        comments = {}
        languages = []
        po_data = defaultdict(dict)
        for po_path in Path("holidays/locale").rglob(f"{self.country_code}.po"):
            po_file = pofile(po_path, wrapwidth=WRAP_WIDTH)
            language = po_file.metadata["Language"]
            default_language = po_file.metadata["X-Source-Language"]
            if language != default_language:
                languages.append(language)
                for po_entry in po_file:
                    po_data[po_entry.msgid][language] = po_entry.msgstr
            else:
                for po_entry in po_file:
                    comments[po_entry.msgid] = po_entry.comment

        languages = sorted(languages)
        headers = ["msgid", "msgcomment", *languages]
        rows = [
            [msgid, comments[msgid]] + [po_data[msgid][language] for language in languages]
            for msgid in po_data
        ]
        self.write_csv(headers, rows)

    def read_csv(self) -> tuple[list[str], list[list[str]]]:
        if not self.csv_file_path.exists():
            print(f"File {self.csv_file_path} not found!")
            sys.exit(1)

        with self.csv_file_path.open("r", encoding=self.encoding) as f:
            reader = csv.reader(f, delimiter=";")
            headers, *rows = reader
        return headers, rows

    def write_csv(self, headers: list[str], rows: list[list[str]]):
        self.csv_file_path.parent.mkdir(parents=True, exist_ok=True)
        with self.csv_file_path.open("w", newline="", encoding=self.encoding) as f:
            writer = csv.writer(f, delimiter=";")
            writer.writerow(headers)
            writer.writerows(rows)

    def import_from_csv(self):
        headers, rows = self.read_csv()
        languages = headers[2:]
        entries = [(msgid, dict(zip(languages, msgstrs))) for msgid, _, *msgstrs in rows]

        for language in languages:
            po_path = Path(f"holidays/locale/{language}/LC_MESSAGES/{self.country_code}.po")
            po_file = pofile(po_path, wrapwidth=WRAP_WIDTH)

            changed = False
            for msgid, msg_mapping in entries:
                entry = po_file.find(msgid)
                if not entry:
                    continue
                msgstr = msg_mapping[language]
                if entry.msgstr != msgstr:
                    entry.msgstr = msgstr
                    if "|" in msgstr:
                        entry.flags.append("fuzzy")
                    changed = True

            if changed:
                po_file.save(po_path, newline="\n")

    def fill_missing_translations(self):
        headers, rows = self.read_csv()
        languages = headers[2:]

        untranslated_languages = set()
        for row in rows:
            msgstrs = row[2:]
            for lang, msgstr in zip(languages, msgstrs):
                if not msgstr.strip():
                    untranslated_languages.add(lang)

        locale_path = Path("holidays/locale")
        translations = {language: defaultdict(set) for language in untranslated_languages}
        for language in untranslated_languages:
            language_prefix = language if self.args.strict_locale else language.split("_")[0]
            for po_path in locale_path.rglob("LC_MESSAGES/*.po"):
                if po_path.stem == self.country_code:
                    continue
                language_dir = po_path.parent.parent.name
                if not (
                    language_dir == language_prefix
                    if self.args.strict_locale
                    else language_dir.startswith(language_prefix)
                ):
                    continue
                po_file = pofile(po_path, wrapwidth=WRAP_WIDTH)
                for entry in po_file:
                    if entry.comment:
                        translations[language][entry.comment].add(entry.msgstr or entry.msgid)

        updated_rows = []
        for msgid, msgcomment, *msgstrs in rows:
            new_msgstrs = []
            for lang, msgstr in zip(languages, msgstrs):
                if msgstr.strip():
                    new_msgstrs.append(msgstr)
                elif msgcomment in translations[lang]:
                    new_msgstrs.append(" | ".join(sorted(translations[lang][msgcomment])))
                else:
                    new_msgstrs.append("")

            updated_rows.append([msgid, msgcomment, *new_msgstrs])

        self.write_csv(headers, updated_rows)

    def run(self):
        if self.args.create_po:
            self.create_po_files()
        elif self.args.export_csv:
            self.export_to_csv()
        elif self.args.import_csv:
            self.import_from_csv()
        elif self.args.fill_missing:
            self.fill_missing_translations()


if __name__ == "__main__":
    LocalizationHelper().run()