import functools
import heapq
import logging
import os
import re
import sys
import time
_LP_LOGIN = None
_LP_LOGIN_AUTH = None
_lp_service = 'production'
_lp_api_version = 'devel'

import cachetools
from distro_info import DebianDistroInfo, UbuntuDistroInfo
from launchpadlib.launchpad import Launchpad as LP
from ubuntutools.archive import UbuntuSourcePackage, DebianSourcePackage, DownloadError
import ubuntutools.lp.lpapicache
from ubuntutools.lp.lpapicache import SourcePackagePublishingHistory

_ddi = DebianDistroInfo()
_udi = UbuntuDistroInfo()

class SourceExtractionException(Exception):
    """Trivial exception class for source-extracting errors"""
    pass


@cachetools.cached(
    cache=cachetools.LFUCache(maxsize=128),
    key=lambda lpobj, attr: (lpobj._root, getattr(lpobj, '%s_link' % attr)),
)
def _get_cached_lp_link(lpobj, attr):
    """Fetch an LP object through a cache

    When <lpobj>.<foo>_link exists, this function can be used to fetch
    <lpobj>.<foo> through a cache keyed on <foo>_link and the logged in object.
    This is used for optimisation in the case that we need to get the same
    <foo> across many objects. For example, we fetch 'distribution' and
    'series' from many different source_package_publishing_history objects, and
    doing those fetches through this cache saves a very large number of API
    calls round trips.

    The cache is keyed on (lpobj._root, <lpobj>.<attr>_link) to maintain
    differing visibilities of objects across different logins.

    lpobj._root is technically private, but seems to be the only sensible
    option:

        <cjwatson> rbasak: I think using _root is your only option
        <cjwatson> It's technically private but in practice very stable

    :param lazr.restfulclient.resource.Entry lpobj: the object from Launchpad
    :param str attr: the key against the Launchpad object to fetch, for which a
        <lpobj>.<attr>_link entry exists
    :returns: the equivalent of <lpobj>.<attr>, but cached
    :rtype: lazr.restfulclient.resource.Entry
    """
    return getattr(lpobj, attr)


def launchpad_login():
    """Cache a single launchpad login here."""
    global _LP_LOGIN
    if _LP_LOGIN:
        return _LP_LOGIN

    _LP_LOGIN = LP.login_anonymously('git-ubuntu-importer', _lp_service,
                                     version=_lp_api_version)
    # This is deliberately in a long form and we do not import the Launchpad
    # object into the module namespace itself to work around LP: #1733388
    ubuntutools.lp.lpapicache.Launchpad.login_existing(_LP_LOGIN)

    return _LP_LOGIN


def launchpad_login_auth():
    """Cache a single launchpad login here."""
    global _LP_LOGIN_AUTH
    if _LP_LOGIN_AUTH:
        return _LP_LOGIN_AUTH

    _LP_LOGIN_AUTH = LP.login_with('git-ubuntu-importer', _lp_service,
                                   version=_lp_api_version)
    return _LP_LOGIN_AUTH


def derive_source_from_series(series):
    if series == 'devel':
        return 'ubuntu'
    if _ddi.valid(codename=series):
        return 'debian'
    if _udi.valid(codename=series):
        return 'ubuntu'
    raise ValueError("Unable to determine distribution from %s" % series)

def derive_codename_from_series(series):
    """Determine the release codename from the series name.

    :param str series: The series name from the Debian changelog
    :rtype str
    :returns: The corresponding codename for the series.
    :raises ValueError: If no valid codename could be determined.

    Each entry in a Debian changelog includes a package, version, series
    (aka 'distribution' in Debian documentation), and urgency in its
    first line.  The series can refer to either a Debian or Ubuntu
    release's codename (e.g. 'buster', 'sid', 'bionic', or 'eoan'), or
    an alias (e.g. 'unstable', 'testing').

    This routine determines whether the given series is valid for Ubuntu
    or Debian, and translates any aliases into the appropriate codename
    (e.g. 'stable' to 'buster', 'unstable' to 'sid').  If the codename
    was not found, a ValueError exception is thrown.
    """
    if _ddi.valid(codename=series):
        return _ddi.codename(release=series, default=series)
    if _udi.valid(codename=series):
        return _udi.codename(release=series, default=series)
    raise ValueError("Unable to determine codename from %s" % series)

# The following two classes are trivial wrappers around their
# corresponding ubuntutools.archive classes, except that they disable
# the user of non-Launchpad masters or mirrors. Instead, all downloads
# will go through Launchpad.
class LaunchpadUbuntuSourcePackage(UbuntuSourcePackage):
    def __init__(self, *args, **kwargs):
        super(LaunchpadUbuntuSourcePackage, self).__init__(
            *args,
            **kwargs,
        )
        self.masters = list()
        self.mirrors = list()


class LaunchpadDebianSourcePackage(DebianSourcePackage):
    def __init__(self, *args, **kwargs):
        super(LaunchpadDebianSourcePackage, self).__init__(
            *args,
            **kwargs,
        )
        self.masters = list()
        self.mirrors = list()


class GitUbuntuPPASourcePackage(UbuntuSourcePackage):
    "Download / unpack an PPA source package"
    def __init__(self, ppa_spec, *args, **kwargs):
        super(GitUbuntuPPASourcePackage, self).__init__(*args, **kwargs)
        self.masters = list()
        self.mirrors = list()
        _, ppa = ppa_spec.split(':')
        self._ppa_owner, self._ppa_name = ppa.split('/', 1)
        self._spph = None

    def _lp_url(self, filename):
        "Build a source package URL on Launchpad"
        ret = os.path.join('https://launchpad.net', "~%s" % self._ppa_owner,
                            '+archive', 'ubuntu', self._ppa_name,
                            '+files', filename)
        return ret

    @property
    def lp_spph(self):
        "Return the LP Source Package Publishing History entry"
        if not self._spph:
            launchpad = launchpad_login()
            possible_people = launchpad.people.findPerson(text=self._ppa_owner)
            for person in possible_people:
                if person.name == self._ppa_owner:
                    ppa_owner = person
                    break
            spph = (ppa_owner.getPPAByName(name=self._ppa_name)
                          .getPublishedSources(
                              source_name=self.source,
                              version=self.version.full_version,
                              exact_match=True,
                          ))
            self._spph = SourcePackagePublishingHistory(spph[0])
        return self._spph


class GitUbuntuSourcePackageInformation:
    def __init__(self, spphr, dist_name, retries=0, retry_backoffs=[],
                 workdir=None, dsc=None, files=list()):
        self._spphr = spphr
        self._dist_name = dist_name
        self._pkgname = self.spphr.source_package_name
        self._version = self.spphr.source_package_version
        self.retries = retries
        self.retry_backoffs = retry_backoffs

        if self._dist_name.startswith('ppa:'):
            func = functools.partial(GitUbuntuPPASourcePackage,
                                     self._dist_name)
        else:
            if self.distribution_name.lower() == 'ubuntu':
                func = LaunchpadUbuntuSourcePackage
            else:
                func = LaunchpadDebianSourcePackage

        # do this here, in case files is passed
        if workdir and not os.path.isdir(workdir):
             os.makedirs(workdir, exist_ok=True)

        self._archive_srcpkg = func(package=self._pkgname,
                                    version=self._version,
                                    workdir=workdir,
                                    quiet=True,
                                    dscfile=dsc)
        for f in files:
            self._archive_srcpkg._download_file(f, f.split('/')[-1])

    @property
    def distribution_name(self):
        # This is the equivalent of self.distribution.name, but accesses
        # distribution through a cache to prevent excessive API round trips.
        distro_series = _get_cached_lp_link(self._spphr, 'distro_series')
        distribution = _get_cached_lp_link(distro_series, 'distribution')
        return distribution.name

    @property
    def series_name(self):
        # This is the equivalent of self.series.name, but accesses
        # distribution through a cache to prevent excessive API round trips.
        return _get_cached_lp_link(self._spphr, 'distro_series').name

    @property
    def parent_series_name(self):
        # This is the equivalent of self.parent_series.name, but accesses
        # distribution through a cache to prevent excessive API round trips.
        distro_series = _get_cached_lp_link(self._spphr, 'distro_series')
        previous_series = _get_cached_lp_link(distro_series, 'previous_series')
        return previous_series.name

    @property
    def spphr(self):
        return self._spphr

    @property
    def version(self):
        return self._version

    @property
    def upstream_version(self):
        return self._archive_srcpkg.version.upstream_version

    @property
    def name(self):
        return self._pkgname

    @property
    def archive_srcpkg(self):
        return self._archive_srcpkg

    @property
    def series(self):
        return self._spphr.distro_series

    @property
    def parent_series(self):
        return self._spphr.distro_series.previous_series

    @property
    def distribution(self):
        return self._spphr.distro_series.distribution

    @property
    def date_published(self):
        return self._spphr.date_published

    @property
    def date_created(self):
        return self._spphr.date_created

    @property
    def pocket(self):
        return self._spphr.pocket

    @property
    def pretty_head_name(self):
        if self._spphr.pocket.lower() == 'release':
            head_name = '%s/%s' % (
            self.distribution_name.lower(),
            self.series_name.lower(),
            )
        else:
            head_name = '%s/%s-%s' % (
            self.distribution_name.lower(),
            self.series_name.lower(),
            self.pocket.lower()
            )
        return head_name

    def head_name(self, prefix):
        if self._spphr.pocket.lower() == 'release':
            head_name = '%s/%s/%s' % (
            prefix,
            self.distribution_name.lower(),
            self.series_name.lower(),
            )
        else:
            head_name = '%s/%s/%s-%s' % (
            prefix,
            self.distribution_name.lower(),
            self.series_name.lower(),
            self.pocket.lower()
            )
        return head_name

    def applied_head_name(self, prefix):
       return self.head_name('%s/applied' % prefix)

    def parent_head_name(self, prefix):
        if self.parent_series == None:
            return None
        if self.pocket.lower() == 'release':
            # release pockets descend from prior series
            head_name = '%s/%s/%s' % (
            prefix,
            self.distribution_name.lower(),
            self.parent_series_name.lower()
            )
        else:
            # non-release pockets descend from release
            head_name = '%s/%s/%s' % (
            prefix,
            self.distribution_name.lower(),
            self.series_name.lower()
            )
        return head_name

    def parent_applied_head_name(self, prefix):
        return self.parent_head_name('%s/applied' % prefix)

    @property
    def dsc(self):
        return self._archive_srcpkg.dsc

    @property
    def dsc_pathname(self):
        return self._archive_srcpkg.dsc_pathname

    def pull(self):
        for i in range(self.retries+1):
            try:
                self._archive_srcpkg.pull()
                break
            except Exception as e:
                logging.error('Failed to pull down source '
                              '(attempt %d/%d)' %
                              (i+1, self.retries+1))
                logging.debug("Source package download error: %r" % e)
                if i < self.retries:
                    time.sleep(self.retry_backoffs[i])
        else:
            raise DownloadError('Failed to pull down source')

    @property
    def workdir(self):
        return self._archive_srcpkg.workdir

    def get_changes_file_url(self):
        """Retrieve the changes file URL associated with this publication

        If there is no changes file URL, then return None. This happens, for
        example, with publications that were synced from Debian.

        :rtype: str or None
        :returns: the changes file URL associated with this publication, or
            None if one isn't available.
        """
        return self._spphr.changesFileUrl()


class NoPublicationHistoryException(Exception):
    pass


# An abstraction of an information source about source packages
class GitUbuntuSourceInformation(object):
    # Some seriess are listed as active in Launchpad but aren't available via
    # apt, so this is a list of those to exclude as a workaround. See
    # https://answers.launchpad.net/launchpad/+question/680444
    DENYLISTED_ACTIVE_SERIESS = {
        'debian': frozenset(['wheezy']),
        'ubuntu': frozenset(),
    }

    _all_series_list = None
    _active_series_list = None
    _stable_series_list = None
    _current_series = None

    def __init__(self, dist_name, pkgname=None,
                 pull_overrides_filename='/dev/null',
                 retries=0,
                 retry_backoffs=[],
                 lp=None,
                ):
        self.launchpad = lp or launchpad_login()
        self.dist_name = dist_name
        if self.dist_name.startswith('ppa:'):
            _, ppa = dist_name.split(':')
            ppa_owner, ppa_name = ppa.split('/', 1)
            possible_people = self.launchpad.people.findPerson(text=ppa_owner)
            person = None
            for person in possible_people:
                if person.name == ppa_owner:
                    ppa_owner = person
                    break
            if person is None:
                raise ValueError(
                    "Unable to find owner for PPA: %s" % dist_name
                )
            self.archive = person.getPPAByName(name=ppa_name)
        else:
            self.dist = self.launchpad.distributions[dist_name]
            self.archive = self.dist.main_archive
        self.pkgname = pkgname
        self.pull_overrides = self.parse_pullfile(pull_overrides_filename)
        self.retries = retries
        self.retry_backoffs = retry_backoffs

    @staticmethod
    def _spi_found_in_relevant_head(head_info, namespace, spi):
        """Determine if a branch head matches the given source publication

        Consider the branch head that is expected to be updated were the source
        publication to be imported. If the source publication appears to
        already be imported there, then this method returns True.

        This is assumed if the commit timestamp at the branch head matches the
        date_created timestamp of the source publication, and the package
        version strings also match.

        :param dict(str, HeadInfoItem) head_info: the state of the relevant
            branch heads, as returned by GitUbuntuRepository.get_head_info().
        :param str namespace: the namespace prefix used in the git repository.
        :param GitUbuntuSourcePackageInformation spi: the Launchpad publication
            to consider.
        :rtype: bool
        :returns: True if the spi matches the relevant branch head
        """
        try:
            head_info_item = head_info[spi.head_name(namespace)]
        except KeyError:
            # We don't have information for the relevant branch. Probably
            # because it doesn't exist. So our answer is no: we don't appear to
            # have this source publication imported.
            return False
        return (
            head_info_item.version == spi.version
            and int(spi.date_created.timestamp()) == head_info_item.commit_time
        )

    @property
    def current_series(self):
        if self.dist_name.startswith('ppa:'):
             return None
        return self.dist.current_series

    @property
    def denylisted_active_seriess(self):
        return self.DENYLISTED_ACTIVE_SERIESS[self.dist_name]

    @property
    def active_series(self):
        if self.dist_name.startswith('ppa:'):
            return []
        # return a list of series objects sorted with newest first.
        if self._active_series_list is None:
            self._active_series_list = sorted(
                [
                    r
                    for r in self.dist.series
                    if (
                        r.active
                        and not r.name in self.denylisted_active_seriess
                    )
                ],
                key=lambda s: float(s.version),
                reverse=True,
            )
        return self._active_series_list

    @property
    def stable_series(self):
        if self.dist_name.startswith('ppa:'):
            return []
        # return a list of released series objects sorted with newest first.
        if self._stable_series_list is None:
            self._stable_series_list = sorted(
                [
                    r for r in self.dist.series if r.active and
                    r.status in ('Current Stable Release', 'Supported')
                ],
                key=lambda s: float(s.version),
                reverse=True,
            )
        return self._stable_series_list

    @property
    def all_series(self):
        if self.dist_name.startswith('ppa:'):
            return []
        # return a list of all series objects sorted with newest first
        if self._all_series_list is None:
            self._all_series_list = sorted(
                self.dist.series,
                key=lambda s: float(s.version),
                reverse=True,
            )
        return self._all_series_list

    @property
    def current_series_name(self):
        if not self.current_series:
             return None
        return self.current_series.name

    @property
    def active_series_name_list(self):
        return [r.name for r in self.active_series]

    @property
    def stable_series_name_list(self):
        return [r.name for r in self.stable_series]

    @property
    def all_series_name_list(self):
        return [r.name for r in self.all_series]

    def get_corrected_spi(self, srcpkg, workdir=None):
       try:
           pull_override = self.pull_overrides[srcpkg.source_package_version]
           dsc = pull_override['dsc']
           files = pull_override['files']
       except KeyError:
           dsc = None
           files = list()
       return GitUbuntuSourcePackageInformation(srcpkg, self.dist_name,
           self.retries, self.retry_backoffs, workdir=workdir,
           dsc=dsc, files=files)

    def launchpad_version_is_published(self, version, workdir=None):
        spph = self.archive.getPublishedSources(
            exact_match=True,
            source_name=self.pkgname,
            version=version,
            order_by_date=True,
        )
        return len(spph) != 0

    def launchpad_versions_published(self, workdir=None,
        sorted_by_version=False, series=None
    ):
        args = {
                'exact_match':True,
                'source_name':self.pkgname,
               }
        if not sorted_by_version:
            args['order_by_date'] = True
        if series:
            args['distro_series'] = series

        spph = self.archive.getPublishedSources(**args)
        if len(spph) == 0:
            raise NoPublicationHistoryException("Is %s published in %s?" %
                                                (self.pkgname, self.dist_name))

        for srcpkg in spph:
            yield self.get_corrected_spi(srcpkg, workdir)

    def launchpad_versions_published_after(
        self,
        head_info,
        namespace,
        workdir=None,
        active_series_only=False,
    ):
        """Return a sequence of GitUbuntuSourcePackageInformation instances

        Return a sequence of GitUbuntuSourcePackageInformation instances
        representing Launchpad publications created in a Launchpad distribution
        after a particular point, as determined by the head_info parameter.

        :param dict head_info: as returned by
            GitUbuntuRepository.get_head_info(). This may be empty, in
            which case all publications are returned. Otherwise, publications
            are skipped that match a version already found in head_info, or
            with a date_created older than what is found in head_info, is
            skipped, as well as any publications that have a prior date_created
            date.
        :param str namespace: the namespace prefix used in the git repository.
        :param workdir: passed through to the GitUbuntuSourcePackageInformation
            constructor.
        :param bool active_series_only: for Ubuntu, skip any series that is not
            an active series, as determined by self.active_series_name_list.
        :rtype: sequence(GitUbuntuSourcePackageInformation)
        """

        # we have the date of the commit too, so we can double-check
        # that it matches
        # iterate from the newest to oldest publish
        # if published version is != corresponding head's version
        #     save off publish
        # once we see the first version matching exactly a head, we
        # assume that all prior versions have been imported/uploaded and
        # so we start the following loop from there
        # this favors (performance-wise) updating an existing tree, but
        # that's the expected use-case.

        # do not do coherence checks on versions less than the head
        # versions, as there are cases (clamav 0.91.2-3ubuntu2.2~feisty1
        # in feisty-backports of a 'next' version being prior in the
        # ordering)
        spph = self.archive.getPublishedSources(
            exact_match=True,
            source_name=self.pkgname,
            order_by_date=True,
        )
        # Coherence check that the passed in srcpkg name has a publication
        # history
        if len(spph) == 0:
            logging.warning("No publication history found for %s in %s.",
                self.pkgname, self.dist_name
            )
            return
        truncated_spph = list()
        for spphr in spph:
            spi = GitUbuntuSourcePackageInformation(
                spphr, self.dist_name, workdir=workdir
            )
            if self._spi_found_in_relevant_head(head_info, namespace, spi):
                break
            truncated_spph.append(spphr)

        for spphr in reversed(truncated_spph):
            spi = self.get_corrected_spi(spphr, workdir)
            if active_series_only and spi.series.name.lower() not in self.active_series_name_list:
                continue
            yield spi

    @staticmethod
    def interleave_launchpad_versions_published_after(
        gusi_head_info_tuple_list,
        namespace,
        workdir=None,
        active_series_only=False,
    ):
        """Interleave multiple calls to launchpad_versions_published_after

        Interleave the result of multiple calls to the
        launchpad_versions_published_after() method. The specification requires
        new publications to be processed across the Debian and Ubuntu
        distributions in order of date_created to establish
        hash stability. launchpad_versions_published_after() returns the result
        of one distribution at a time. This method calls any number
        simultaneously and interleaves the results so that the caller sees a
        single combined sequence in ascending order of date_created. If
        date_created is the same, results are provided in the order that the
        distributions are specified in gusi_head_info_tuple_list. In other
        words, the distribution ordering is the secondary sort key.

        :param list(tuple(GitUbuntuSourceInformation, dict))
            gusi_head_info_tuple_list: a list of the parameters to use for the
            underlying launchpad_versions_published_after() method call. The
            GitUbuntuSourceInformation instance is the object to call the
            method against. The second element of the tuple is the head_info
            parameter to pass to that method. The other parameters are passed
            through to the underlying call as-is.
        :param namespace: passed through to
            launchpad_versions_published_after().
        :param workdir: passed through to launchpad_versions_published_after().
        :param bool active_series_only: passed through to
            launchpad_versions_published_after().
        :rtype: sequence(GitUbuntuSourcePackageInformation)
        :returns: the combined return sequences of the underlying calls to
            launchpad_versions_published_after() interleaved as specified.
        """
        # Create a mapping of distribution to priority, where distribution is
        # the Launchpad distribution_link string representing the distribution,
        # and priority is an integer (low number=high priority). Then we will
        # be able to sort according to priority as a secondary key to ensure
        # that if timestamps of a publication are the same across
        # distributions, the distribution mentioned first in
        # gusi_head_info_tuple_list will appear in the results first.
        dist_priority = {
            gusi.dist.self_link: i
            for i, (gusi, head_info)
            in enumerate(gusi_head_info_tuple_list)
        }

        # Now that we have a mapping of distribution_link to priority, the sort
        # key function is simple: sort on date created first, and if they are
        # equal then use the distribution priority. Python will do this for us
        # if we provide a tuple. The distribution priority is necessary because
        # there is no guarantee that heapq.merge(), as used below, provides a
        # stable sort. If it did, then keying on date_created only would be
        # sufficient.
        def key_func(spi):
            distro_series = _get_cached_lp_link(spi._spphr, 'distro_series')
            distribution_link = distro_series.distribution_link
            return spi._spphr.date_created, dist_priority[distribution_link]

        # Create one generator per gusi_head_info_tuple_list entry.
        spi_generators_to_interleave = [
            gusi.launchpad_versions_published_after(
                head_info=head_info,
                namespace=namespace,
                workdir=workdir,
                active_series_only=active_series_only,
            )
            for gusi, head_info in gusi_head_info_tuple_list
        ]

        # heapq.merge() will now do the interleaving. This relies on the
        # results of each of the calls to launchpad_versions_published_after()
        # already being sorted by date_created.
        return heapq.merge(
            *spi_generators_to_interleave,
            key=key_func,
        )

    def parse_pullfile(self, pullfile):
        """Extract source file overrides from a file

        The pull overrides file specifies sources to use for a source
        package instead of Launchpad, so that a publish is importable.
        Typically, this is needed when an orig tarball is republished with
        different contents than before, but with the same name, e.g., when
        an epoch bump occurs but the version is not changed. This is no
        longer typical, but can be seen historically.

        The format of the pull overrides file is:
        <pkgname> <version> <URL of dsc file> <URLs of orig files>
        with one package per line.

        <pkgname> is the name of the source package to which this override
        applies.

        <version> is the published version which needs
        this override.

        <URL of dsc file> is a URL to a valid DSC file for this publication.
        '-' indicates to use the DSC file from Launchpad.

        <URLs of orig files> is a whitespace-separated list of further URLs
        to download that are shadowed in Launchpad.

        Keyword Arguments:
        pullfile -- Path to patch overrides file, or None if not required
        """
        pull_overrides = dict()
        if not pullfile:
            return pull_overrides
        try:
            with open(pullfile) as f:
                for line in f:
                    if line.startswith('#'):
                        continue
                    m = re.match(
                        r'(?P<pkgname>\S*)\s*(?P<version>\S*)\s*(?P<dscurl>\S*)\s*(?P<files>.*)',
                        line
                        )
                    if m is None:
                        continue
                    if m.group('pkgname') != self.pkgname:
                        continue
                    if m.group('dscurl') == '-':
                        # use DSC file from LP
                        dscurl = None
                    else:
                        dscurl = m.group('dscurl')
                    files = m.group('files').split()
                    pull_overrides[m.group('version')] = {
                        'dsc':dscurl,
                        'files':files
                        }
        except FileNotFoundError:
            pass
        return pull_overrides
