import collections
import collections.abc
import ipaddress
import numbers
import re

from .chars import SUB_DELIMS
from .encoding import uriencode
from .split import uriunsplit

# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
_SCHEME_RE = re.compile(b'^[A-Za-z][A-Za-z0-9+.-]*$')

# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ]
_AUTHORITY_RE_BYTES = re.compile(b'^(?:(.*)@)?(.*?)(?::([0-9]*))?$')
_AUTHORITY_RE_STR = re.compile(u'^(?:(.*)@)?(.*?)(?::([0-9]*))?$')

# safe component characters
_SAFE_USERINFO = SUB_DELIMS + ':'
_SAFE_HOST = SUB_DELIMS
_SAFE_PATH = SUB_DELIMS + ':@/'
_SAFE_QUERY = SUB_DELIMS + ':@/?'
_SAFE_FRAGMENT = SUB_DELIMS + ':@/?'


def _scheme(scheme):
    if _SCHEME_RE.match(scheme):
        return scheme.lower()
    else:
        raise ValueError('Invalid scheme component')


def _authority(userinfo, host, port, encoding):
    authority = []

    if userinfo is not None:
        authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding))
        authority.append(b'@')

    if isinstance(host, ipaddress.IPv6Address):
        authority.append(b'[' + host.compressed.encode() + b']')
    elif isinstance(host, ipaddress.IPv4Address):
        authority.append(host.compressed.encode())
    elif isinstance(host, bytes):
        authority.append(_host(host))
    elif host is not None:
        authority.append(_host(host.encode('utf-8')))

    if isinstance(port, numbers.Number):
        authority.append(_port(str(port).encode()))
    elif isinstance(port, bytes):
        authority.append(_port(port))
    elif port is not None:
        authority.append(_port(port.encode()))

    return b''.join(authority) if authority else None


def _ip_literal(address):
    if address.startswith('v'):
        raise ValueError('Address mechanism not supported')
    else:
        return b'[' + ipaddress.IPv6Address(address).compressed.encode() + b']'


def _host(host):
    # RFC 3986 3.2.3: Although host is case-insensitive, producers and
    # normalizers should use lowercase for registered names and
    # hexadecimal addresses for the sake of uniformity, while only
    # using uppercase letters for percent-encodings.
    if host.startswith(b'[') and host.endswith(b']'):
        return _ip_literal(host[1:-1].decode())
    # check for IPv6 addresses as returned by SplitResult.gethost()
    try:
        return _ip_literal(host.decode('utf-8'))
    except ValueError:
        return uriencode(host, _SAFE_HOST, 'utf-8').lower()


def _port(port):
    # RFC 3986 3.2.3: URI producers and normalizers should omit the
    # port component and its ":" delimiter if port is empty or if its
    # value would be the same as that of the scheme's default.
    if port.lstrip(b'0123456789'):
        raise ValueError('Invalid port subcomponent')
    elif port:
        return b':' + port
    else:
        return b''


def _querylist(items, sep, encoding):
    terms = []
    append = terms.append
    safe = _SAFE_QUERY.replace(sep, '')
    for key, value in items:
        name = uriencode(key, safe, encoding)
        if value is None:
            append(name)
        elif isinstance(value, (bytes, str)):
            append(name + b'=' + uriencode(value, safe, encoding))
        else:
            append(name + b'=' + uriencode(str(value), safe, encoding))
    return sep.encode('ascii').join(terms)


def _querydict(mapping, sep, encoding):
    items = []
    for key, value in mapping.items():
        if isinstance(value, (bytes, str)):
            items.append((key, value))
        elif isinstance(value, collections.abc.Iterable):
            items.extend([(key, v) for v in value])
        else:
            items.append((key, value))
    return _querylist(items, sep, encoding)


def uricompose(scheme=None, authority=None, path='', query=None,
               fragment=None, userinfo=None, host=None, port=None,
               querysep='&', encoding='utf-8'):
    """Compose a URI reference string from its individual components."""

    # RFC 3986 3.1: Scheme names consist of a sequence of characters
    # beginning with a letter and followed by any combination of
    # letters, digits, plus ("+"), period ("."), or hyphen ("-").
    # Although schemes are case-insensitive, the canonical form is
    # lowercase and documents that specify schemes must do so with
    # lowercase letters.  An implementation should accept uppercase
    # letters as equivalent to lowercase in scheme names (e.g., allow
    # "HTTP" as well as "http") for the sake of robustness but should
    # only produce lowercase scheme names for consistency.
    if isinstance(scheme, bytes):
        scheme = _scheme(scheme)
    elif scheme is not None:
        scheme = _scheme(scheme.encode())

    # authority must be string type or three-item iterable
    if authority is None:
        authority = (None, None, None)
    elif isinstance(authority, bytes):
        authority = _AUTHORITY_RE_BYTES.match(authority).groups()
    elif isinstance(authority, str):
        authority = _AUTHORITY_RE_STR.match(authority).groups()
    elif not isinstance(authority, collections.abc.Iterable):
        raise TypeError('Invalid authority type')
    elif len(authority) != 3:
        raise ValueError('Invalid authority length')
    authority = _authority(
        userinfo if userinfo is not None else authority[0],
        host if host is not None else authority[1],
        port if port is not None else authority[2],
        encoding
    )

    # RFC 3986 3.3: If a URI contains an authority component, then the
    # path component must either be empty or begin with a slash ("/")
    # character.  If a URI does not contain an authority component,
    # then the path cannot begin with two slash characters ("//").
    path = uriencode(path, _SAFE_PATH, encoding)
    if authority is not None and path and not path.startswith(b'/'):
        raise ValueError('Invalid path with authority component')
    if authority is None and path.startswith(b'//'):
        raise ValueError('Invalid path without authority component')

    # RFC 3986 4.2: A path segment that contains a colon character
    # (e.g., "this:that") cannot be used as the first segment of a
    # relative-path reference, as it would be mistaken for a scheme
    # name.  Such a segment must be preceded by a dot-segment (e.g.,
    # "./this:that") to make a relative-path reference.
    if scheme is None and authority is None and not path.startswith(b'/'):
        if b':' in path.partition(b'/')[0]:
            path = b'./' + path

    # RFC 3986 3.4: The characters slash ("/") and question mark ("?")
    # may represent data within the query component.  Beware that some
    # older, erroneous implementations may not handle such data
    # correctly when it is used as the base URI for relative
    # references (Section 5.1), apparently because they fail to
    # distinguish query data from path data when looking for
    # hierarchical separators.  However, as query components are often
    # used to carry identifying information in the form of "key=value"
    # pairs and one frequently used value is a reference to another
    # URI, it is sometimes better for usability to avoid percent-
    # encoding those characters.
    if isinstance(query, (bytes, str)):
        query = uriencode(query, _SAFE_QUERY, encoding)
    elif isinstance(query, collections.abc.Mapping):
        query = _querydict(query, querysep, encoding)
    elif isinstance(query, collections.abc.Iterable):
        query = _querylist(query, querysep, encoding)
    elif query is not None:
        raise TypeError('Invalid query type')

    # RFC 3986 3.5: The characters slash ("/") and question mark ("?")
    # are allowed to represent data within the fragment identifier.
    # Beware that some older, erroneous implementations may not handle
    # this data correctly when it is used as the base URI for relative
    # references.
    if fragment is not None:
        fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding)

    # return URI reference as `str`
    return uriunsplit((scheme, authority, path, query, fragment)).decode()
