#!/usr/bin/env python
"""Script to read the libphonenumber XML metadata and generate Python code.

Invocation:
  buildmetadatafromxml.py infile.xml outdir module_prefix

Processes the given XML metadata file and emit generated Python code.
The output directory will be created if it does not exist, and
__init__.py and per-region data files will be created in the directory.
"""

# Based on original Java code and XML file:
#     resources/PhoneNumberMetadata.xml
#     java/resources/com/google/i18n/phonenumbers/BuildMetadataFromXml.java
# Copyright (C) 2010-2011 The Libphonenumber Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This code was originally developed from the XML file, and the DTD within it.
# Subsequently, post-processing code was added to match the behaviour of
# BuildMetadataFromXml.java

# import to allow this code to work with Python2.5
from __future__ import with_statement

import sys
import os
import copy
import re
import getopt
import datetime
from xml.etree import ElementTree as etree

# Use the local code in preference to any pre-installed version
sys.path.insert(0, '../../python')

# Pull in the data structure definitions
from phonenumbers.phonemetadata import NumberFormat, PhoneNumberDesc, PhoneMetadata
from phonenumbers.phonemetadata import REGION_CODE_FOR_NON_GEO_ENTITY
from phonenumbers.util import UnicodeMixin, u, prnt

# Global flag for lax XML parsing
lax = False

# Convention: variables beginning with 'x' are XML objects

# Top-level XML element containing data
TOP_XPATH = "territories"
# XML element name for the territory element
TERRITORY_TAG = "territory"
# Marker for unavailable entries
DATA_NA = "NA"

# Boilerplate text for generated Python files
METADATA_FILE_PROLOG = '"""Auto-generated file, do not edit by hand."""'
METADATA_FILE_IMPORT = "from %(module)s.phonemetadata import PhoneMetadata\n"
METADATA_FILE_LOOP = '''
def _load_region(code):
    __import__("region_%%s" %% code, globals(), locals(),
               fromlist=["PHONE_METADATA_%%s" %% code], level=1)

for region_code in _AVAILABLE_REGION_CODES:
    PhoneMetadata.register_%(prefix)sregion_loader(region_code, _load_region)
'''
METADATA_NONGEO_FILE_LOOP = '''
for country_code in _AVAILABLE_NONGEO_COUNTRY_CODES:
    PhoneMetadata.register_nongeo_region_loader(country_code, _load_region)
'''

_COUNTRY_CODE_TO_REGION_CODE_PROLOG = '''
# A mapping from a country code to the region codes which
# denote the country/region represented by that country code.
# In the case of multiple countries sharing a calling code,
# such as the NANPA countries, the one indicated with
# "main_country_for_code" in the metadata should be first.'''

# Boilerplate header for individual region data files
_REGION_METADATA_PROLOG = '''"""Auto-generated file, do not edit by hand. %(region)s metadata"""
from %(module)s.phonemetadata import NumberFormat, PhoneNumberDesc, PhoneMetadata
'''

# Boilerplate header for individual country-code alternate number format data files
_ALT_FORMAT_METADATA_PROLOG = '''"""Auto-generated file, do not edit by hand. %s metadata"""
from %s.phonemetadata import NumberFormat
'''

# Copyright notice covering the XML metadata; include current year.
COPYRIGHT_NOTICE = """# Copyright (C) 2010-%s The Libphonenumber Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" % datetime.datetime.now().year


# XML processing utility functions that are useful for the particular
# structure of the phone number metadata
def _get_unique_child(xtag, eltname):
    """Get the unique child element under xtag with name eltname"""
    try:
        results = xtag.findall(eltname)
        if len(results) > 1:
            raise Exception("Multiple elements found where 0/1 expected")
        elif len(results) == 1:
            return results[0]
        else:
            return None
    except Exception:
        return None


def _get_unique_child_value(xtag, eltname):
    """Get the text content of the unique child element under xtag with name eltname"""
    xelt = _get_unique_child(xtag, eltname)
    if xelt is None:
        return None
    else:
        return xelt.text


def get_true_attrib(xtag, aname):
    if aname in xtag.attrib:
        if xtag.attrib[aname] != 'true':
            raise Exception("Unexpected value %s for %s attribute" % (xtag.attrib[aname], aname))
        return True
    else:
        return False


def get_optional_true_attrib(xtag, aname):
    if aname in xtag.attrib:
        if xtag.attrib[aname] != 'true':
            raise Exception("Unexpected value %s for %s attribute" % (xtag.attrib[aname], aname))
        return True
    else:
        return None


def _dews_re(re_str):
    """Remove all whitespace in given regular expression string"""
    if re_str is None:
        return None
    else:
        return re.sub(r'\s', '', re_str)


_NUM_RE = re.compile('\d+')
_RANGE_RE = re.compile(r'\[(?P<min>\d+)-(?P<max>\d+)\]')


def _extract_lengths(ll):
    """Extract list of possible lengths from string"""
    results = set()
    if ll is None:
        return []
    for val in ll.split(','):
        m = _NUM_RE.match(val)
        if m:
            results.add(int(val))
        else:
            m = _RANGE_RE.match(val)
            if m is None:
                raise Exception("Unrecognized length specification %s" % ll)
            min = int(m.group('min'))
            max = int(m.group('max'))
            for ii in range(min, max + 1):
                results.add(ii)
    return sorted(list(results))


def _expand_formatting_rule(rule, national_prefix):
    """Formatting rules can include terms "$NP" and "$FG",
    These get replaced with:
     "$NP" => the national prefix
     "$FG" => the first group, i.e. "$1"
    """
    if rule is None:
        return None
    if national_prefix is None:
        national_prefix = u("")
    rule = re.sub(u("\$NP"), national_prefix, rule)
    rule = re.sub(u("\$FG"), u("$1"), rule)
    return rule


class XAlternateNumberFormat(UnicodeMixin):
    """Parse alternate NumberFormat objects from XML element"""
    def __init__(self, xtag):
        if xtag is None:
            self.o = None
        else:
            self.o = NumberFormat()
            self.o._mutable = True
            self.o.pattern = xtag.attrib['pattern']  # REQUIRED attribute
            self.o.format = _get_unique_child_value(xtag, 'format')
            if self.o.format is None:
                raise Exception("No format pattern found")
            else:
                # Replace '$1' etc  with '\1' to match Python regexp group reference format
                self.o.format = re.sub('\$', u(r'\\'), self.o.format)
            xleading_digits = xtag.findall("leadingDigits")
            for xleading_digit in xleading_digits:
                self.o.leading_digits_pattern.append(_dews_re(xleading_digit.text))
            # Currently this assumes no intlFormat elements in the element

    def __unicode__(self):
        return u(self.o)


class XNumberFormat(UnicodeMixin):
    """Parsed NumberFormat objects from XML element"""
    def __init__(self, owning_xterr, xtag, national_prefix,
                 national_prefix_formatting_rule,
                 national_prefix_optional_when_formatting,
                 carrier_code_formatting_rule):
        if xtag is None:
            self.o = None
            self.io = None
        else:
            self.o = NumberFormat()
            self.o._mutable = True
            # Find the REQUIRED attribute
            self.o.pattern = xtag.attrib['pattern']
            # Find the IMPLIED attribute(s)
            self.o.domestic_carrier_code_formatting_rule = xtag.get('carrierCodeFormattingRule', None)
            self.o.national_prefix_formatting_rule = xtag.get('nationalPrefixFormattingRule', None)
            self.o.national_prefix_optional_when_formatting = get_optional_true_attrib(xtag, 'nationalPrefixOptionalWhenFormatting')

            # Post-process formatting rules for expansions and defaults
            if self.o.national_prefix_formatting_rule is not None:
                # expand abbreviations
                self.o.national_prefix_formatting_rule = _expand_formatting_rule(self.o.national_prefix_formatting_rule,
                                                                                 national_prefix)
            else:
                # set to territory-wide formatting rule
                self.o.national_prefix_formatting_rule = national_prefix_formatting_rule
            if self.o.national_prefix_formatting_rule is not None:
                # Replace '$1' etc  with '\1' to match Python regexp group reference format
                self.o.national_prefix_formatting_rule = re.sub('\$', r'\\', self.o.national_prefix_formatting_rule)

            if not self.o.national_prefix_optional_when_formatting and national_prefix_optional_when_formatting:
                # If attrib is None, it was missing and inherits territory-wide value
                self.o.national_prefix_optional_when_formatting = national_prefix_optional_when_formatting

            if self.o.domestic_carrier_code_formatting_rule is not None:
                # expand abbreviations
                self.o.domestic_carrier_code_formatting_rule = _expand_formatting_rule(self.o.domestic_carrier_code_formatting_rule,
                                                                                       national_prefix)
            else:
                # set to territory-wide formatting rule
                self.o.domestic_carrier_code_formatting_rule = carrier_code_formatting_rule
            if self.o.domestic_carrier_code_formatting_rule is not None:
                # Replace '$1' etc  with '\1' to match Python regexp group reference format
                self.o.domestic_carrier_code_formatting_rule = re.sub('\$(\d)', r'\\\1', self.o.domestic_carrier_code_formatting_rule)

            self.o.format = _get_unique_child_value(xtag, 'format')
            if self.o.format is None:
                raise Exception("No format pattern found")
            else:
                # Replace '$1' etc  with '\1' to match Python regexp group reference format
                self.o.format = re.sub('\$', u(r'\\'), self.o.format)
            xleading_digits = xtag.findall("leadingDigits")
            for xleading_digit in xleading_digits:
                self.o.leading_digits_pattern.append(_dews_re(xleading_digit.text))

            # Add this NumberFormat object into the owning metadata
            owning_xterr.o.number_format.append(self.o)

            # Extract the pattern for international format; if not present, use the national format.
            # If the intlFormat is set to "NA" the intlFormat should be ignored.
            self.io = NumberFormat(pattern=self.o.pattern,
                                   leading_digits_pattern=self.o.leading_digits_pattern)
            self.io._mutable = True

            intl_format = _get_unique_child_value(xtag, "intlFormat")
            if intl_format is None:
                # Default to use the same as the national pattern if none is defined.
                self.io.format = self.o.format
            else:
                # Replace '$1' etc  with '\1' to match Python regexp group reference format
                intl_format = re.sub('\$', u(r'\\'), intl_format)
                if intl_format != DATA_NA:
                    self.io.format = intl_format
                owning_xterr.has_explicit_intl_format = True
            if self.io.format is not None:
                # Add this international NumberFormat object into the owning metadata
                owning_xterr.o.intl_number_format.append(self.io)

    def __unicode__(self):
        return u(self.o)


class XPhoneNumberDesc(UnicodeMixin):
    """Parse PhoneNumberDesc object from XML element"""
    def __init__(self, xterritory, tag, template=None, general_desc=False):
        id = xterritory.attrib['id']
        xtag = _get_unique_child(xterritory, tag)
        self.xtag = xtag
        if xtag is None:
            # When a PhoneNumberDesc is absent, the upstream Java code builds an object
            # of form PhoneNumberDesc(national_number_pattern="NA", possible_length=(-1,)).
            # The Python code uses a desc of None for this case, to keep the generated
            # code size smaller.
            self.o = None
            return
        self.o = PhoneNumberDesc()
        self.o._mutable = True
        self.o.national_number_pattern = None
        self.o.possible_number_pattern = None  # retired
        # Set possible length info to None for now, to mark that it wasn't specified
        # for this numberDesc.
        self.o.possible_length = None
        self.o.possible_length_local_only = None
        self.o.example_number = None

        # Always expect a nationalNumberPattern element
        self.o.national_number_pattern = _dews_re(_get_unique_child_value(xtag, 'nationalNumberPattern'))
        if self.o.national_number_pattern is None:
            if lax:
                if template is not None:
                    self.o.national_number_pattern = template.national_number_pattern
            else:
                raise Exception("Missing required nationalNumberPattern element in %s.%s" % (id, tag))

        # An exampleNumber element is present iff this is not the generalDesc
        example_number = _get_unique_child_value(xtag, 'exampleNumber')
        if (not lax) and (not general_desc) and (example_number is None):
            raise Exception("Missing required exampleNumber element in %s.%s" % (id, tag))
        if general_desc and example_number is not None:
            if lax:
                example_number = None
            else:
                raise Exception("Unexpected exampleNumber element for generalDesc in %s.%s" % (id, tag))
        self.o.example_number = example_number

        # A possibleLengths element is present iff this is not the generalDesc
        possible_lengths = _get_unique_child(xtag, 'possibleLengths')
        if (not lax) and (not general_desc) and (possible_lengths is None):
            raise Exception("Missing required possibleLengths element in %s.%s" % (id, tag))
        if general_desc and possible_lengths is not None:
            raise Exception("Unexpected possibleLengths for generalDesc in %s.%s" % (id, tag))
        if possible_lengths is not None:
            national_lengths = possible_lengths.attrib['national']  # REQUIRED attribute
            if national_lengths == "-1":
                # -1 used to be a special possibleLengths value, no longer allowed.
                raise Exception("Found unexpected %s.%s.possibleLength.national==-1", (id, tag))
            self.o.possible_length = _extract_lengths(national_lengths)
            local_lengths = possible_lengths.get('localOnly', None)  # IMPLIED attribute
            self.o.possible_length_local_only = _extract_lengths(local_lengths)

    def __unicode__(self):
        return u(self.o)


class XAlternateTerritory(UnicodeMixin):
    """Parse alternate format metadata from XML element (territory)"""
    def __init__(self, xterritory):
        self.country_code = int(xterritory.attrib['countryCode'])
        # Look for available formats
        self.number_format = []
        formats = _get_unique_child(xterritory, "availableFormats")
        if formats is not None:
            for xelt in formats.findall("numberFormat"):
                # Create an XNumberFormat object, which contains a NumberFormat object
                # or two, and which self-registers them with self.o
                self.number_format.append(XAlternateNumberFormat(xelt).o)
        if len(self.number_format) == 0:
            raise Exception("No number formats found in available formats")
        # Currently this assumes no intlFormat elements in the file

    def __unicode__(self):
        return u(self.number_format)


class XTerritory(UnicodeMixin):
    """Parse PhoneMetadata from XML element (territory)"""
    def __init__(self, xterritory, short_data):
        # Retrieve the REQUIRED attributes
        id = xterritory.attrib['id']
        self.o = PhoneMetadata(id, short_data=short_data, register=False)
        self.o._mutable = True
        if 'countryCode' in xterritory.attrib:
            self.o.country_code = int(xterritory.attrib['countryCode'])
        else:
            self.o.country_code = None
        # Retrieve the IMPLIED attributes
        self.o.international_prefix = xterritory.get('internationalPrefix', None)
        self.o.leading_digits = xterritory.get('leadingDigits', None)
        self.o.preferred_international_prefix = xterritory.get('preferredInternationalPrefix', None)
        self.o.national_prefix = xterritory.get('nationalPrefix', None)
        self.o.national_prefix_for_parsing = _dews_re(xterritory.get('nationalPrefixForParsing', None))
        self.o.national_prefix_transform_rule = xterritory.get('nationalPrefixTransformRule', None)
        if self.o.national_prefix_transform_rule is not None:
            # Replace '$1' etc  with '\1' to match Python regexp group reference format
            self.o.national_prefix_transform_rule = re.sub('\$', r'\\', self.o.national_prefix_transform_rule)
        self.o.preferred_extn_prefix = xterritory.get('preferredExtnPrefix', None)
        national_prefix_formatting_rule = xterritory.get('nationalPrefixFormattingRule', None)
        national_prefix_optional_when_formatting = get_true_attrib(xterritory, 'nationalPrefixOptionalWhenFormatting')
        carrier_code_formatting_rule = xterritory.get('carrierCodeFormattingRule', None)

        # Post-processing for the territory-default formatting rules.  These are used
        # in NumberFormat elements that don't supply their own formatting rules.
        if self.o.national_prefix is not None:
            if self.o.national_prefix_for_parsing is None:
                # Default to self.national_prefix when national_prefix_for_parsing not set
                self.o.national_prefix_for_parsing = self.o.national_prefix
        national_prefix_formatting_rule = _expand_formatting_rule(national_prefix_formatting_rule,
                                                                  self.o.national_prefix)
        carrier_code_formatting_rule = _expand_formatting_rule(carrier_code_formatting_rule,
                                                               self.o.national_prefix)
        self.o.main_country_for_code = get_true_attrib(xterritory, 'mainCountryForCode')
        self.o.leading_zero_possible = get_true_attrib(xterritory, 'leadingZeroPossible')
        self.o.mobile_number_portable_region = get_true_attrib(xterritory, 'mobileNumberPortableRegion')

        # Retrieve the various PhoneNumberDesc elements, which mostly have the form:
        #   (nationalNumberPattern, possibleLengths, exampleNumber)
        # However the general_desc is first and special; it has form:
        #   (nationalNumberPattern)
        # and it will be used to fill out missing fields in many of the other PhoneNumberDesc elements.
        self.o.general_desc = XPhoneNumberDesc(xterritory, 'generalDesc', general_desc=True).o

        self.o.toll_free = XPhoneNumberDesc(xterritory, 'tollFree', template=self.o.general_desc).o
        self.o.premium_rate = XPhoneNumberDesc(xterritory, 'premiumRate', template=self.o.general_desc).o
        if not short_data:
            # Mobile and fixed-line descriptions do not inherit anything from the general_desc
            self.o.fixed_line = XPhoneNumberDesc(xterritory, 'fixedLine').o
            self.o.mobile = XPhoneNumberDesc(xterritory, 'mobile').o

            self.o.pager = XPhoneNumberDesc(xterritory, 'pager', template=self.o.general_desc).o
            self.o.shared_cost = XPhoneNumberDesc(xterritory, 'sharedCost', template=self.o.general_desc).o
            self.o.personal_number = XPhoneNumberDesc(xterritory, 'personalNumber', template=self.o.general_desc).o
            self.o.voip = XPhoneNumberDesc(xterritory, 'voip', template=self.o.general_desc).o
            self.o.uan = XPhoneNumberDesc(xterritory, 'uan', template=self.o.general_desc).o
            self.o.voicemail = XPhoneNumberDesc(xterritory, 'voicemail', template=self.o.general_desc).o
            self.o.no_international_dialling = XPhoneNumberDesc(xterritory, 'noInternationalDialling', template=self.o.general_desc).o

            # Skip noInternationalDialling when combining possible length information
            sub_descs = (self.o.toll_free, self.o.premium_rate, self.o.fixed_line, self.o.mobile,
                         self.o.pager, self.o.shared_cost, self.o.personal_number, self.o.voip,
                         self.o.uan, self.o.voicemail)
            all_descs = (self.o.toll_free, self.o.premium_rate, self.o.fixed_line, self.o.mobile,
                         self.o.pager, self.o.shared_cost, self.o.personal_number, self.o.voip,
                         self.o.uan, self.o.voicemail, self.o.no_international_dialling)
        else:
            self.o.standard_rate = XPhoneNumberDesc(xterritory, 'standardRate', template=self.o.general_desc).o
            self.o.short_code = XPhoneNumberDesc(xterritory, 'shortCode', template=self.o.general_desc).o
            self.o.carrier_specific = XPhoneNumberDesc(xterritory, 'carrierSpecific', template=self.o.general_desc).o
            self.o.sms_services = XPhoneNumberDesc(xterritory, 'smsServices', template=self.o.general_desc).o
            self.o.emergency = XPhoneNumberDesc(xterritory, 'emergency', template=self.o.general_desc).o
            # For short number metadata, copy the lengths from the "short code" section only.
            sub_descs = (self.o.short_code,)
            all_descs = (self.o.toll_free, self.o.premium_rate, self.o.standard_rate,
                         self.o.short_code, self.o.carrier_specific, self.o.emergency)

        # Build the possible length information for general_desc based on all the different types of number.
        possible_lengths = set()
        local_lengths = set()
        for desc in sub_descs:
            if desc is None:
                continue
            if desc.possible_length is not None:
                possible_lengths.update(desc.possible_length)
            if desc.possible_length_local_only is not None:
                local_lengths.update(desc.possible_length_local_only)
        self.o.general_desc.possible_length = sorted(list(possible_lengths))
        self.o.general_desc.possible_length_local_only = sorted(list(local_lengths))
        if -1 in self.o.general_desc.possible_length:
            raise Exception("Found -1 length in general_desc.possible_length")
        if -1 in self.o.general_desc.possible_length_local_only:
            raise Exception("Found -1 length in general_desc.possible_length_local_only")

        # Now that the union of length information is available, trickle it back down to those types
        # of number that didn't specify any length information (indicated by having those fields set
        # to None).  But only if they're non
        for desc in all_descs:
            if desc is None:
                continue
            if desc.national_number_pattern is None:
                desc.possible_length = []
                desc.possible_length_local_only = []
                continue
            if desc.possible_length is None:
                desc.possible_length = copy.copy(self.o.general_desc.possible_length)
            if desc.possible_length_local_only is None:
                desc.possible_length_local_only = copy.copy(self.o.general_desc.o.possible_length_local_only)

        # Look for available formats
        self.has_explicit_intl_format = False
        formats = _get_unique_child(xterritory, "availableFormats")
        if formats is not None:
            for xelt in formats.findall("numberFormat"):
                # Create an XNumberFormat object, which contains a NumberFormat object
                # or two, and which self-registers them with self.o
                XNumberFormat(self,
                              xelt,
                              self.o.national_prefix,
                              national_prefix_formatting_rule,
                              national_prefix_optional_when_formatting,
                              carrier_code_formatting_rule)
            if len(self.o.number_format) == 0:
                raise Exception("No number formats found in available formats")
        if not self.has_explicit_intl_format:
            # Only a small number of regions need to specify the intlFormats
            # in the XML.  For the majority of countries the intlNumberFormat
            # metadata is an exact copy of the national NumberFormat metadata.
            # To minimize the size of the metadata file, we only keep
            # intlNumberFormats that actually differ in some way to the
            # national formats.
            self.o.intl_number_format = []

    def identifier(self):
        if self.o.id == REGION_CODE_FOR_NON_GEO_ENTITY:
            # For non-geographical country calling codes (e.g. +800), use the
            # country calling codes instead of the region code to form the
            # file name.
            return str(self.o.country_code)
        else:
            return self.o.id

    def __unicode__(self):
        return u(self.o)


class XPhoneNumberMetadata(UnicodeMixin):
    """Entire collection of phone number metadata retrieved from XML"""
    def __init__(self, filename, short_data):
        # Load the XML data from the given filename
        with open(filename, "r") as infile:
            xtree = etree.parse(infile)
        # Move to the top-level element of interest
        xterritories = xtree.find(TOP_XPATH)
        # Iterate over the child elements, as there isn't any complex nesting or
        # tree structures in the XML DTD.
        self.territory = {}
        for xterritory in xterritories:
            if xterritory.tag == TERRITORY_TAG:
                terrobj = XTerritory(xterritory, short_data)
                id = terrobj.identifier()  # like "US" for countries, "800" for non-geo
                if id in self.territory:
                    raise Exception("Duplicate entry for %s" % id)
                self.territory[id] = terrobj
            else:
                raise Exception("Unexpected element %s found" % xterritory.tag)
        self.alt_territory = None
        self.short_data = short_data

    def add_alternate_formats(self, filename):
        """Add phone number alternate format metadata retrieved from XML"""
        with open(filename, "r") as infile:
            xtree = etree.parse(infile)
        self.alt_territory = {}  # country_code to XAlternateTerritory
        xterritories = xtree.find(TOP_XPATH)
        for xterritory in xterritories:
            if xterritory.tag == TERRITORY_TAG:
                terrobj = XAlternateTerritory(xterritory)
                id = str(terrobj.country_code)
                if id in self.alt_territory:
                    raise Exception("Duplicate entry for %s" % id)
                self.alt_territory[id] = terrobj
            else:
                raise Exception("Unexpected element %s found" % xterritory.tag)

    def __unicode__(self):
        return u("\n").join([u("%s: %s") % (country_id, territory) for country_id, territory in self.territory.items()])

    def emit_metadata_for_region_py(self, region, region_filename, module_prefix):
        """Emit Python code generating the metadata for the given region"""
        terrobj = self.territory[region]
        with open(region_filename, "w") as outfile:
            prnt(_REGION_METADATA_PROLOG % {'region': terrobj.identifier(), 'module': module_prefix}, file=outfile)
            prnt("PHONE_METADATA_%s = %s" % (terrobj.identifier(), terrobj), file=outfile)

    def emit_alt_formats_for_cc_py(self, cc, cc_filename, module_prefix):
        """Emit Python code generating the alternate format metadata for the given country code"""
        terrobj = self.alt_territory[cc]
        with open(cc_filename, "w") as outfile:
            prnt(_ALT_FORMAT_METADATA_PROLOG % (cc, module_prefix), file=outfile)
            prnt("PHONE_ALT_FORMAT_%s = %s" % (cc, terrobj), file=outfile)

    def emit_metadata_py(self, datadir, module_prefix):
        """Emit Python code for the phone number metadata to the given file, and
        to a data/ subdirectory in the same directory as that file."""

        if not os.path.isdir(datadir):
            os.mkdir(datadir)
        modulefilename = os.path.join(datadir, '__init__.py')

        # First, generate all of the individual per-region files in that directory
        for country_id in sorted(self.territory.keys()):
            filename = os.path.join(datadir, "region_%s.py" % country_id)
            self.emit_metadata_for_region_py(country_id, filename, module_prefix)

        # Same for any per-country-code alternate format files
        if self.alt_territory is not None:
            for country_code in sorted(self.alt_territory.keys()):
                filename = os.path.join(datadir, "alt_format_%s.py" % country_code)
                self.emit_alt_formats_for_cc_py(country_code, filename, module_prefix)

        # Now build a module file that includes them all
        with open(modulefilename, "w") as outfile:
            prnt(METADATA_FILE_PROLOG, file=outfile)
            prnt(COPYRIGHT_NOTICE, file=outfile)
            prnt(METADATA_FILE_IMPORT % {'module': module_prefix}, file=outfile)
            nongeo_codes = []
            country_codes = []
            for country_id in sorted(self.territory.keys()):
                terrobj = self.territory[country_id]
                if terrobj.o.id == REGION_CODE_FOR_NON_GEO_ENTITY:
                    nongeo_codes.append(country_id)  # int
                else:
                    country_codes.append("'%s'" % country_id)  # quoted string
            prnt("_AVAILABLE_REGION_CODES = [%s]" % ",".join(country_codes), file=outfile)
            if len(nongeo_codes) > 0:
                prnt("_AVAILABLE_NONGEO_COUNTRY_CODES = [%s]" % ", ".join(nongeo_codes), file=outfile)
            register_prefix = "short_" if self.short_data else ""
            prnt(METADATA_FILE_LOOP % {'prefix': register_prefix}, file=outfile)
            if len(nongeo_codes) > 0:
                prnt(METADATA_NONGEO_FILE_LOOP, file=outfile)

            if self.alt_territory is not None:
                for country_code in sorted(self.alt_territory.keys()):
                    prnt("from .alt_format_%s import PHONE_ALT_FORMAT_%s" % (country_code, country_code), file=outfile)
                prnt("_ALT_NUMBER_FORMATS = {%s}" %
                     ", ".join(["%s: PHONE_ALT_FORMAT_%s" % (cc, cc) for cc in sorted(self.alt_territory.keys())]),
                     file=outfile)

            # Build up a map from country code (int) to list of region codes (ISO 3166-1 alpha 2)
            country_code_to_region_code = {}
            for country_id in sorted(self.territory.keys()):
                terrobj = self.territory[country_id]
                if terrobj.o.country_code is not None:
                    country_code = int(terrobj.o.country_code)
                    if country_code not in country_code_to_region_code:
                        country_code_to_region_code[country_code] = []
                    if terrobj.o.main_country_for_code:
                        country_code_to_region_code[country_code].insert(0, terrobj.o.id)
                    else:
                        country_code_to_region_code[country_code].append(terrobj.o.id)

            # Emit the mapping from country code to region code if nonempty.
            if len(country_code_to_region_code.keys()) > 0:
                prnt(_COUNTRY_CODE_TO_REGION_CODE_PROLOG, file=outfile)
                prnt("_COUNTRY_CODE_TO_REGION_CODE = {", file=outfile)
                for country_code in sorted(country_code_to_region_code.keys()):
                    country_ids = country_code_to_region_code[country_code]
                    prnt('    %d: ("%s",),' % (country_code, '", "'.join(country_ids)), file=outfile)
                prnt("}", file=outfile)


def _standalone(argv):
    """Parse the given XML file and emit generated code."""
    alternate = None
    short_data = False
    try:
        opts, args = getopt.getopt(argv, "hlsa:", ("help", "lax", "short", "alt="))
    except getopt.GetoptError:
        prnt(__doc__, file=sys.stderr)
        sys.exit(1)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            prnt(__doc__, file=sys.stderr)
            sys.exit(1)
        elif opt in ("-s", "--short"):
            short_data = True
        elif opt in ("-l", "--lax"):
            global lax
            lax = True
        elif opt in ("-a", "--alt"):
            alternate = arg
        else:
            prnt("Unknown option %s" % opt, file=sys.stderr)
            prnt(__doc__, file=sys.stderr)
            sys.exit(1)

    if len(args) != 3:
        prnt(__doc__, file=sys.stderr)
        sys.exit(1)
    pmd = XPhoneNumberMetadata(args[0], short_data)
    if alternate is not None:
        pmd.add_alternate_formats(alternate)
    pmd.emit_metadata_py(args[1], args[2])


if __name__ == "__main__":
    _standalone(sys.argv[1:])
