#!/usr/bin/env python3

# This script generates the mapping between MS Windows timezone names and
# tzdata/Olsen timezone names, by retrieving a file:
# http://unicode.org/cldr/data/common/supplemental/supplementalData.xml
# and parsing it, and from this generating the file windows_tz.py.
#
# It must be run with Python 3.

import ftplib
import logging
from io import BytesIO
from pprint import pprint
import tarfile
from urllib.parse import urlparse
from urllib.request import urlopen
from xml.dom import minidom

WIN_ZONES_URL = 'https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml'
ZONEINFO_URL = 'ftp://ftp.iana.org/tz/tzdata-latest.tar.gz'

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()


def update_old_names():
    """Fetches the list of old tz names and returns a mapping"""

    url = urlparse(ZONEINFO_URL)
    log.info('Connecting to %s' % url.netloc)
    ftp = ftplib.FTP(url.netloc)
    ftp.login()
    gzfile = BytesIO()

    log.info('Fetching zoneinfo database')
    ftp.retrbinary('RETR ' + url.path, gzfile.write)
    gzfile.seek(0)

    log.info('Extracting backwards data')
    archive = tarfile.open(mode="r:gz", fileobj=gzfile)
    backward = {}
    for line in archive.extractfile('backward').readlines():
        if line[0] == '#':
            continue
        if len(line.strip()) == 0:
            continue
        parts = line.split()
        if parts[0] != b'Link':
            continue

        backward[parts[2].decode('ascii')] = parts[1].decode('ascii')

    return backward


def update_windows_zones():
    backward = update_old_names()

    log.info('Fetching Windows mapping info from unicode.org')
    source = urlopen(WIN_ZONES_URL).read()
    dom = minidom.parseString(source)

    for element in dom.getElementsByTagName('mapTimezones'):
        if element.getAttribute('type') == 'windows':
            break

    log.info('Making windows mapping')
    win_tz = {}
    tz_win = {}
    for mapping in element.getElementsByTagName('mapZone'):
        if mapping.getAttribute('territory') == '001':
            win_tz[mapping.getAttribute('other')] = mapping.getAttribute('type').split(' ')[0]
            if win_tz[mapping.getAttribute('other')].startswith('Etc'):
                print (win_tz[mapping.getAttribute('other')], mapping.getAttribute('type').split(' ')[0])

        for tz_name in mapping.getAttribute('type').split(' '):
            tz_win[tz_name] = mapping.getAttribute('other')

    log.info('Adding backwards data')
    # Map in the backwards compatible zone names
    for backward_compat_name, standard_name in backward.items():
        win_zone = tz_win.get(standard_name, None)
        if win_zone:
            tz_win[backward_compat_name] = win_zone

    # Etc/UTC is a common but non-standard alias for Etc/GMT:
    tz_win['Etc/UTC'] = 'UTC'

    log.info('Writing mapping')
    with open('tzlocal/windows_tz.py', "wt") as out:
        out.write("# This file is autogenerated by the update_windows_mapping.py script\n"
                  "# Do not edit.\nwin_tz = ")
        pprint(win_tz, out)
        out.write("\n# Old name for the win_tz variable:\ntz_names = win_tz\n\ntz_win = ")
        pprint(tz_win, out)

    log.info('Done')


if __name__ == '__main__':
    update_windows_zones()
