# git ubuntu build usage
# Equivalent of running dpkg-buildpackage from the current directory

# uses a cache in .git/
# 1) grab orig tarballs to build with
#    a) if native package, skip
#    b) look up required upstream version based upon debian/changelog in
#    working directory
#    c) does .git/build_cache/<srcpkg>/<upstream version>/<dsc file> exist
#    d) does every orig tarball file mentioned in dsc file exist in
#    .git/build_cache/<srcpkg>/<upstream version>/ or ../
#    e) if yes, success
#    f) if no, for distro in ubuntu, debian
#        i) walk lp publishing history for srcpkg in distro backwards
#        ii) if upstream versions match, pull dsc, orig tarballs into
#        .git/build_cached/<srcpkg>/<upstream version/<dsc file>
# 2) call dpkg-buildpackage $@

import argparse
from collections import namedtuple
from contextlib import ExitStack
import functools
from itertools import filterfalse
import logging
import os
import re
import shutil
from subprocess import CalledProcessError
import sys
import tempfile
import time
import traceback
from gitubuntu.__main__ import top_level_defaults
from gitubuntu.cache import CACHE_PATH
from gitubuntu.dsc import GitUbuntuDsc
import gitubuntu.git_repository
from gitubuntu.run import (
    decode_binary,
    run,
    runq,
)
from gitubuntu.source_information import (
    GitUbuntuSourceInformation,
    NoPublicationHistoryException,
    derive_source_from_series,
    derive_codename_from_series,
)
from gitubuntu.test_util import get_test_changelog
from gitubuntu.versioning import Version
from debian.debfile import PART_EXTS
import pygit2
import pytest

from distro_info import DebianDistroInfo, UbuntuDistroInfo


# We don't really know for certain if we have the correct set of orig tarballs
# in the parent directory until we try to build the package. To try and infer
# the real truth without trying to build it would involve re-implementing some
# of dpkg-source, which we'd like to avoid. Instead, we will attempt various
# ways of fetching the orig tarballs and consider it done when a build is
# successful.

# An "orig search list" is a list of OrigSearchListEntry namedtuples that tells
# us in what order to look for an orig tarball using different sources and
# mechanisms.
OrigSearchListEntry = namedtuple(
    'OrigSearchListEntry',
    [
        'mechanism',   # how we look
        'source',      # where we look (eg. 'debian' or 'ubuntu')
        'must_build',  # whether a build failure terminates the search
    ],
)


# All of the fetch* functions have expected semantics:
# - They take a gitubuntu.git_repository.Changelog object and a source
#   string (see OrigSearchListEntry for the possible values)
# - They return:
#   - None if nothing has been done, and the parent directory does not
#     contain suitable orig tarballs
#   - A list of string paths on the filesystem that contain tarballs
#     needed to build, which might be empty.

def fetch_orig_noop(changelog, source):
    """Just return an empty list

    Used for a "nothing: just try to build it" fetch mechanism that complies
    with the generic fetch mechanism protocol. This is useful if the package
    can have no orig tarball because it is a native package, for example.
    """
    return []


def fetch_orig_from_parent_dir(changelog, source):
    """Look for orig tarballs in the parent directory

    @changelog: gitubuntu.git_repository.Changelog object representing required version
    @source: ignored; only to match generic fetch mechanism protocol

    Returns the list of orig tarball paths if found using a regex

    XXX Before 1.0, this should be updated to use pristine-tar, if
    possible, to see if the provided tarballs are verifiable. We can use
    derive_source_from_changelog to determine which pristine-tar
    branches to use. An exception should be raised if they fail to
    verify.
    XXX It is unclear what to do if tarballs are present in the parent
    directory, but not in pristine-tar. By default, I think we should
    use them (perhaps with a warning) and provide a mechanism to
    override that behavior (e.g., by allowing the user to specify the
    mechanism explicitly to be launchpad or pristine-tar)

    Returns None if orig tarballs were not found in the parent directory.
    """
    assert changelog.upstream_version and changelog.srcpkg
    prefix = '%s_%s.orig.tar.' % (changelog.srcpkg, changelog.upstream_version)
    possible_filenames = frozenset([
        (prefix + ext) for ext in PART_EXTS
    ])
    for f in possible_filenames:
        path = os.path.join(os.path.pardir, f)
        if os.path.exists(path):
            return [path,]
    return None


def _symlink_into_parent_dir(path):
    """Sybmolic link file into parent directory.

    @param path: string path of the target of the intended link

    Creates a symlink with the same name as the target file in the parent
    directory.

    Returns as a string the relative path to the new symlink.
    """
    destination_path = os.path.join(os.path.pardir, os.path.basename(path))
    if os.path.exists(destination_path):
        if not os.path.islink(destination_path):
            raise FileExistsError('File exists %s' % destination_path)
        target = os.readlink(destination_path)
        if not target.startswith('/'):
            target = os.path.join(os.path.dirname(destination_path), target)
        if target == path:
            return None
        raise FileExistsError('Link at %s does not point to %s' %
            (
                destination_path,
                path,
            )
        )
    os.symlink(path, destination_path)
    return destination_path


def _populate_cache(cache, src_path, cache_file_name=None):
    """Copy file to path in cache

    @param cache: path to cache directory
    @param src_path: path to file to cache
    @param cache_file_name: name of file to store in cache. If None, use
    the same name as src_path

    returns path to cached file
    """

    if cache_file_name is None:
        cache_file_name = os.path.basename(src_path)

    target_path = os.path.join(cache, cache_file_name)
    shutil.copyfile(
        src_path,
        target_path,
    )

    return target_path


def _symlink_paths_into_parent_dir(paths):
    """Symbolic link a list of paths into parent directory.

    @param paths: list of paths to symlink into parent directory

    Creates a symlink with the same name as the target file in the parent
    directory.

    Returns as a string the relative path to the new symlink.
    """
    # refuse to override a file without force
    # if a file already exists and points to where we want it, that's ok
    # if a file already exists and is either not a link or does not
    # point where we want it, raise an exception

    unwind_unlink_paths = []
    for path in paths:
        try:
            new_path = _symlink_into_parent_dir(path)
            if new_path is not None:
                unwind_unlink_paths.append(new_path)
        except:
            for path in unwind_unlink_paths:
                os.unlink(path)
            raise


def fetch_orig_from_cache(changelog, source, dl_cache=None):
    """Look for orig tarballs in the download cache

    @param changelog: gitubuntu.git_repository.Changelog object representing required
    version
    @param source: 'debian' or 'ubuntu': which cache to use
    @param dl_cache: string path to use as the cache directory; if
    None the the global CACHE_PATH is used

    Returns a list of orig tarball paths if found and the dsc verifies.
    Returns None if no matching upstream version was found in the cache.

    Raises XXX if a dsc found but failed to verify.
    """
    assert changelog.upstream_version and changelog.srcpkg
    cached_dsc_path = os.path.join(
        dl_cache or os.path.join(
            os.getcwd(),
            os.getenv('GIT_DIR', '.git'),
            CACHE_PATH
        ),
        source,
        changelog.srcpkg,
        changelog.upstream_version,
        'DSC',
    )
    try:
        dsc = GitUbuntuDsc(cached_dsc_path)
    except (FileNotFoundError, NotADirectoryError):
        return None
    if dsc.verify() is None:
        logging.warn(
            "Cache dir found, but verification of orig tarball(s) failed."
        )
        return None # XXX decide - see docstring

    orig_paths = dsc.all_tarball_paths
    _symlink_paths_into_parent_dir(orig_paths)
    return orig_paths


def fetch_orig_from_pristine_tar(changelog, source, repo):
    """Fetch orig tarballs from pristine-tar branches

    @changelog: gitubuntu.git_repository.Changelog object representing required version
    @source: 'debian' or 'ubuntu': which branch to use
    @repo: gitubuntu.git_repository.GitRepository object containing pristine-tar data

    Returns a list of orig tarball paths if found and extracted to
    parent directory.
    Returns None if matching orig tarballs were not found in the
    pristine-tar branches.
    """
    try:
        results = repo.pristine_tar_exists(
            changelog.srcpkg,
            changelog.upstream_version,
        )
    except gitubuntu.git_repository.MultiplePristineTarFoundError as e:
        logging.warning("%s. This is often because the orig "
            "tarball compression changed and it is not possible "
            "to determine which tarball to use automatically.", e
        )
        return None

    if len(results) == 0:
        logging.warning("No pristine-tar data found for %s",
            changelog.upstream_version
        )
        return None

    if len(results) == 1 and source not in results:
        logging.warning("pristine-tar data found for %s, but expected to "
            "find %s", next(iter(results)), source
        )
        return None

    if len(results) == 2:
        logging.warning("pristine-tar data found for both debian and "
            "ubuntu, using %s", source
        )

    try:
        return repo.pristine_tar_extract(
            changelog.srcpkg,
            changelog.upstream_version,
            source,
        )
    except CalledProcessError:
        return None


def fetch_orig_from_launchpad(changelog, source, pullfile, retries,
    retry_backoffs, dl_cache=None,
):
    """Fetch orig tarballs from Launchpad

    @changelog: gitubuntu.git_repository.Changelog object representing required version
    @source: 'debian' or 'ubuntu': which Launchpad distribution to search

    Returns a list of matching orig tarball paths if found and extracted
    to parent directory.

    Returns None if matching orig tarballs were not found in the
    specified distribution in Launchpad.
    """
    assert changelog.upstream_version and changelog.srcpkg
    assert source in ['debian', 'ubuntu']
    dist_sinfo = GitUbuntuSourceInformation(
        dist_name=source,
        pkgname=changelog.srcpkg,
        pull_overrides_filename=os.path.abspath(pullfile),
        retries=retries,
        retry_backoffs=retry_backoffs,
    )

    try:
        versions_published = dist_sinfo.launchpad_versions_published(
            workdir=dl_cache,
            sorted_by_version=True,
        )
    except NoPublicationHistoryException:
        logging.warning(
            "No publication history found for %s in %s. ",
            changelog.srcpkg,
            source,
        )
        return None

    for spi in versions_published:
        logging.debug(
            "Checking if upstream version of publish %s matches %s",
            spi.version,
            changelog.upstream_version,
        )
        if Version(spi.upstream_version) < Version(changelog.upstream_version):
            logging.info(
                "New upstream version detected (%s) which is after the last "
                "published upstream version (%s).",
                changelog.upstream_version,
                spi.upstream_version,
            )
            return None
        elif spi.upstream_version == changelog.upstream_version:
            logging.debug(
                "Upstream version of publish %s matches",
                spi.version,
            )

            spi.pull()
            dsc = GitUbuntuDsc(spi.dsc_pathname)
            orig_paths = dsc.all_tarball_paths
            if dl_cache is None:
                _symlink_paths_into_parent_dir(orig_paths)
            else:
                srcpkg_cache_dir = os.path.join(
                    dl_cache,
                    derive_source_from_changelog(changelog),
                    changelog.srcpkg,
                    changelog.upstream_version,
                )
                if not os.path.isdir(srcpkg_cache_dir):
                    os.makedirs(srcpkg_cache_dir, exist_ok=True)
                # populate cache
                logging.debug("Caching dsc file")
                _populate_cache(srcpkg_cache_dir, dsc.dsc_path, 'DSC')

                cached_paths  = []
                for path in orig_paths:
                    cached_paths.append(_populate_cache(srcpkg_cache_dir, path))
                _symlink_paths_into_parent_dir(cached_paths)

            return orig_paths

    # Fell out of the for loop: didn't find anything.
    return None


def derive_orig_search_list_from_args(
    repo,
    commitish,
    for_merge,
    no_pristine_tar,
    pullfile=top_level_defaults.pullfile,
    retries=top_level_defaults.retries,
    retry_backoffs=top_level_defaults.retry_backoffs,
    dl_cache=None,
):
    native = is_native_package(
        repo.get_changelog_from_treeish(commitish)
    )

    if native:
        # No orig tarball required
        return [
            OrigSearchListEntry(
                mechanism=fetch_orig_noop,
                source=None,
                must_build=True,
            ),
        ]

    source = 'debian' if for_merge else 'ubuntu'
    orig_search_list = [
        OrigSearchListEntry(
            mechanism=fetch_orig_from_parent_dir,
            source=None,
            must_build=True,
        ),
        OrigSearchListEntry(
            mechanism=fetch_orig_from_cache,
            source=source,
            must_build=False,
        ),
    ]
    if not no_pristine_tar:
        orig_search_list.append(
            OrigSearchListEntry(
                mechanism=functools.partial(fetch_orig_from_pristine_tar,
                    repo=repo,
                ),
                source=source,
                must_build=False,
            )
        )
    orig_search_list.extend([
        OrigSearchListEntry(
            mechanism=functools.partial(fetch_orig_from_launchpad,
                pullfile=pullfile,
                retries=retries,
                retry_backoffs=retry_backoffs,
                dl_cache=dl_cache,
            ),
            source=source,
            must_build=False,
        ),
    ])

    return orig_search_list


def derive_source_from_changelog(changelog):
    series = changelog.distribution.split('-')[0]
    return derive_source_from_series(series)

@pytest.mark.parametrize('changelog_name, expected', [
    ('test_distribution_source_1', 'ubuntu'),
    ('test_distribution_source_2', 'ubuntu'),
    ('test_distribution_source_3', 'debian'),
    ('test_distribution_source_4', 'ubuntu'),
])
def test_derive_source_from_changelog(changelog_name, expected):
    assert derive_source_from_changelog(
        get_test_changelog(changelog_name),
    ) == expected

def expand_changelog_source_aliases(orig_search_list, changelog):
    """Replace 'changelog' sources by reading debian/changelog

    @param orig_search_list: list of OrigSearchListEntry namedtuples
    Returns: a sequence with changelog sources expanded
    """
    for entry in orig_search_list:
        if entry.source == 'changelog':
            yield entry._replace(
                source=derive_source_from_changelog(changelog),
            )
        else:
            yield entry


def fetch_orig(
    orig_search_list,
    changelog,
):
    """
    :param orig_search_list: list of OrigSearchListEntry namedtuples.
    :rtype: list
    :returns: A list of successfully fetched tarballs, or an empty list
        if none were found.
    """
    unaliased_orig_search_list = expand_changelog_source_aliases(
        orig_search_list,
        changelog,
    )
    # Follow searches already attempted as a 'changelog' source alias
    # may expand to a duplicate.
    for entry in unique_everseen(unaliased_orig_search_list):
        try:
            mechanism_name = entry.mechanism.__name__
        except AttributeError:
            mechanism_name = entry.mechanism.func.__name__
        tarballs = entry.mechanism(changelog, entry.source)
        if tarballs is None:
            logging.debug('%s(source=%s) failed',
                mechanism_name,
                entry.source,
            )
            continue  # search returned negative; try next search entry
        assert isinstance(tarballs, list)
        logging.info('Successfully fetched%susing %s(source=%s)',
            ':\n' + '\n'.join(tarballs) + '\n' if tarballs else ' ',
            mechanism_name,
            entry.source,
        )
        return tarballs
    return []


class NativenessMismatchError(Exception): pass

def is_native_package(changelog):
    """Determine if package is native

    @changelog: gitubuntu.git_repository.Changelog object

    os.getpwd() is used to look for debian/source/format
    """
    version_is_native = '-' not in changelog.version

    try:
        with open('debian/source/format', 'r') as f:
            if 'native' not in f.read() and version_is_native:
                raise NativenessMismatchError(
                    "Native versioning found in debian/changelog, but "
                    "debian/source/format does not indicate a native "
                    "source package."
                )
    except FileNotFoundError:
        logging.warning(
            "No debian/source/format file found. "
            "Unable to verify native packaging format."
        )
    return version_is_native


# Adapted from https://docs.python.org/3/library/itertools.html
def unique_everseen(iterable):
    "List unique elements, preserving order. Remember all elements ever seen."
    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
    seen = set()
    # I'm not sure I like the idea of modifying the seen set during the
    # iteration of filterfalse. This then depends on the exact evaluation
    # order of filterfalse (eg. if it started looking ahead, this function
    # would return incorrect results). But this is what the officially
    # documented example does.
    for element in filterfalse(seen.__contains__, iterable):
        seen.add(element)
        yield element
@pytest.mark.parametrize('iterable, expected', [
    ('AAAABBBCCDAABBB', ['A', 'B', 'C', 'D']),
])
def test_unique_everseen(iterable, expected):
    assert list(unique_everseen(iterable)) == expected
