1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
|
import os
import yaml
import logging
from typing import List, Dict, Set, Tuple
from collections import defaultdict
from countrynames.util import process_data
log = logging.getLogger("countrynames.compile")
CODE_DIR = os.path.dirname(__file__)
def load_yaml_data() -> Dict[str, List[str]]:
data = defaultdict(list)
data_file = os.path.join(CODE_DIR, "data.yaml")
with open(data_file, "r", encoding="utf-8") as fh:
for code, names in yaml.safe_load(fh).items():
data[code].extend(names)
return data
def write_python(data: Dict[str, List[str]]) -> None:
python_file = os.path.join(CODE_DIR, "data.py")
with open(python_file, "w", encoding="utf-8") as pyfh:
pyfh.write("# generated file, do not edit.\n")
pyfh.write("from typing import Dict, List\n\n")
pyfh.write("DATA: Dict[str, List[str]] = {}\n")
for code, names in data.items():
code = code.strip().upper()
pyfh.write("DATA[%r] = %r\n" % (code, names))
def validate_data(data: Dict[str, List[str]]) -> None:
"""Run a check on the country name database."""
mappings: Dict[str, Set[Tuple[str, str]]] = {}
for code, norm, original in process_data(data):
mappings.setdefault(norm, set())
mappings[norm].add((code, original))
for norm, assigned in mappings.items():
countries = set([c for (c, _) in assigned])
if len(countries) == 1:
continue
log.warning("Ambiguous string [%s]: %r", norm, assigned)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
data = load_yaml_data()
validate_data(data)
write_python(data)
|