1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
import json
import os
import shutil
from collections import OrderedDict
import regex as re
from ruamel.yaml import RoundTripLoader
from dateparser_scripts.order_languages import avoid_languages
from dateparser_scripts.utils import combine_dicts
cldr_date_directory = "../dateparser_data/cldr_language_data/date_translation_data/"
supplementary_directory = "../dateparser_data/supplementary_language_data/"
supplementary_date_directory = (
"../dateparser_data/supplementary_language_data/date_translation_data/"
)
translation_data_directory = "../dateparser/data/"
date_translation_directory = "../dateparser/data/date_translation_data/"
os.chdir(os.path.dirname(os.path.abspath(__file__)))
cldr_languages = list(
set(map(lambda x: x[:-5], os.listdir(cldr_date_directory))) - avoid_languages
)
supplementary_languages = [x[:-5] for x in os.listdir(supplementary_date_directory)]
all_languages = set(cldr_languages).union(set(supplementary_languages))
RELATIVE_PATTERN = re.compile(r"\{0\}")
def _modify_relative_data(relative_data):
modified_relative_data = OrderedDict()
for key, value in relative_data.items():
for i, string in enumerate(value):
string = RELATIVE_PATTERN.sub(r"(\\d+[.,]?\\d*)", string)
value[i] = string
modified_relative_data[key] = value
return modified_relative_data
def _modify_data(language_data):
relative_data = language_data.get("relative-type-regex", {})
relative_data = _modify_relative_data(relative_data)
locale_specific_data = language_data.get("locale_specific", {})
for _, info in locale_specific_data.items():
locale_relative_data = info.get("relative-type-regex", {})
locale_relative_data = _modify_relative_data(locale_relative_data)
def _get_complete_date_translation_data(language):
cldr_data = {}
supplementary_data = {}
if language in cldr_languages:
with open(cldr_date_directory + language + ".json") as f:
cldr_data = json.load(f, object_pairs_hook=OrderedDict)
if language in supplementary_languages:
with open(supplementary_date_directory + language + ".yaml") as g:
supplementary_data = OrderedDict(RoundTripLoader(g).get_data())
complete_data = combine_dicts(cldr_data, supplementary_data)
if "name" not in complete_data:
complete_data["name"] = language
return complete_data
def _write_file(filename, text, mode, in_memory, in_memory_result):
if in_memory:
in_memory_result[filename] = text
else:
with open(filename, mode) as out:
out.write(text)
def write_complete_data(in_memory=False):
"""
This function is responsible of generating the needed py files from the
CLDR files (JSON format) and supplementary language data (YAML format).
Use it with in_memory=True to avoid writing real files and getting a
dictionary containing the file names and their content (used when testing).
"""
in_memory_result = {}
if not in_memory:
if not os.path.isdir(translation_data_directory):
os.mkdir(translation_data_directory)
if os.path.isdir(date_translation_directory):
shutil.rmtree(date_translation_directory)
os.mkdir(date_translation_directory)
with open(supplementary_directory + "base_data.yaml") as f:
base_data = RoundTripLoader(f).get_data()
for language in all_languages:
date_translation_data = _get_complete_date_translation_data(language)
date_translation_data = combine_dicts(date_translation_data, base_data)
_modify_data(date_translation_data)
translation_data = json.dumps(
date_translation_data, indent=4, separators=(",", ": "), ensure_ascii=False
)
out_text = ("info = " + translation_data + "\n").encode("utf-8")
_write_file(
date_translation_directory + language + ".py",
out_text,
"wb",
in_memory,
in_memory_result,
)
init_text = (
"from dateparser.data import date_translation_data\n"
"from .languages_info import language_order, language_locale_dict\n"
)
_write_file(
translation_data_directory + "__init__.py",
init_text,
"w",
False,
in_memory_result,
)
_write_file(
date_translation_directory + "__init__.py", "", "w", False, in_memory_result
)
return in_memory_result
if __name__ == "__main__":
write_complete_data()
|