File: build.py

package info (click to toggle)
git-ubuntu 1.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,688 kB
  • sloc: python: 13,378; sh: 480; makefile: 2
file content (561 lines) | stat: -rw-r--r-- 18,897 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
# git ubuntu build usage
# Equivalent of running dpkg-buildpackage from the current directory

# uses a cache in .git/
# 1) grab orig tarballs to build with
#    a) if native package, skip
#    b) look up required upstream version based upon debian/changelog in
#    working directory
#    c) does .git/build_cache/<srcpkg>/<upstream version>/<dsc file> exist
#    d) does every orig tarball file mentioned in dsc file exist in
#    .git/build_cache/<srcpkg>/<upstream version>/ or ../
#    e) if yes, success
#    f) if no, for distro in ubuntu, debian
#        i) walk lp publishing history for srcpkg in distro backwards
#        ii) if upstream versions match, pull dsc, orig tarballs into
#        .git/build_cached/<srcpkg>/<upstream version/<dsc file>
# 2) call dpkg-buildpackage $@

import argparse
from collections import namedtuple
from contextlib import ExitStack
import functools
from itertools import filterfalse
import logging
import os
import re
import shutil
from subprocess import CalledProcessError
import sys
import tempfile
import time
import traceback
from gitubuntu.__main__ import top_level_defaults
from gitubuntu.cache import CACHE_PATH
from gitubuntu.dsc import GitUbuntuDsc
import gitubuntu.git_repository
from gitubuntu.run import (
    decode_binary,
    run,
    runq,
)
from gitubuntu.source_information import (
    GitUbuntuSourceInformation,
    NoPublicationHistoryException,
    derive_source_from_series,
    derive_codename_from_series,
)
from gitubuntu.test_util import get_test_changelog
from gitubuntu.versioning import Version
from debian.debfile import PART_EXTS
import pygit2
import pytest

from distro_info import DebianDistroInfo, UbuntuDistroInfo


# We don't really know for certain if we have the correct set of orig tarballs
# in the parent directory until we try to build the package. To try and infer
# the real truth without trying to build it would involve re-implementing some
# of dpkg-source, which we'd like to avoid. Instead, we will attempt various
# ways of fetching the orig tarballs and consider it done when a build is
# successful.

# An "orig search list" is a list of OrigSearchListEntry namedtuples that tells
# us in what order to look for an orig tarball using different sources and
# mechanisms.
OrigSearchListEntry = namedtuple(
    'OrigSearchListEntry',
    [
        'mechanism',   # how we look
        'source',      # where we look (eg. 'debian' or 'ubuntu')
        'must_build',  # whether a build failure terminates the search
    ],
)


# All of the fetch* functions have expected semantics:
# - They take a gitubuntu.git_repository.Changelog object and a source
#   string (see OrigSearchListEntry for the possible values)
# - They return:
#   - None if nothing has been done, and the parent directory does not
#     contain suitable orig tarballs
#   - A list of string paths on the filesystem that contain tarballs
#     needed to build, which might be empty.

def fetch_orig_noop(changelog, source):
    """Just return an empty list

    Used for a "nothing: just try to build it" fetch mechanism that complies
    with the generic fetch mechanism protocol. This is useful if the package
    can have no orig tarball because it is a native package, for example.
    """
    return []


def fetch_orig_from_parent_dir(changelog, source):
    """Look for orig tarballs in the parent directory

    @changelog: gitubuntu.git_repository.Changelog object representing required version
    @source: ignored; only to match generic fetch mechanism protocol

    Returns the list of orig tarball paths if found using a regex

    XXX Before 1.0, this should be updated to use pristine-tar, if
    possible, to see if the provided tarballs are verifiable. We can use
    derive_source_from_changelog to determine which pristine-tar
    branches to use. An exception should be raised if they fail to
    verify.
    XXX It is unclear what to do if tarballs are present in the parent
    directory, but not in pristine-tar. By default, I think we should
    use them (perhaps with a warning) and provide a mechanism to
    override that behavior (e.g., by allowing the user to specify the
    mechanism explicitly to be launchpad or pristine-tar)

    Returns None if orig tarballs were not found in the parent directory.
    """
    assert changelog.upstream_version and changelog.srcpkg
    prefix = '%s_%s.orig.tar.' % (changelog.srcpkg, changelog.upstream_version)
    possible_filenames = frozenset([
        (prefix + ext) for ext in PART_EXTS
    ])
    for f in possible_filenames:
        path = os.path.join(os.path.pardir, f)
        if os.path.exists(path):
            return [path,]
    return None


def _symlink_into_parent_dir(path):
    """Sybmolic link file into parent directory.

    @param path: string path of the target of the intended link

    Creates a symlink with the same name as the target file in the parent
    directory.

    Returns as a string the relative path to the new symlink.
    """
    destination_path = os.path.join(os.path.pardir, os.path.basename(path))
    if os.path.exists(destination_path):
        if not os.path.islink(destination_path):
            raise FileExistsError('File exists %s' % destination_path)
        target = os.readlink(destination_path)
        if not target.startswith('/'):
            target = os.path.join(os.path.dirname(destination_path), target)
        if target == path:
            return None
        raise FileExistsError('Link at %s does not point to %s' %
            (
                destination_path,
                path,
            )
        )
    os.symlink(path, destination_path)
    return destination_path


def _populate_cache(cache, src_path, cache_file_name=None):
    """Copy file to path in cache

    @param cache: path to cache directory
    @param src_path: path to file to cache
    @param cache_file_name: name of file to store in cache. If None, use
    the same name as src_path

    returns path to cached file
    """

    if cache_file_name is None:
        cache_file_name = os.path.basename(src_path)

    target_path = os.path.join(cache, cache_file_name)
    shutil.copyfile(
        src_path,
        target_path,
    )

    return target_path


def _symlink_paths_into_parent_dir(paths):
    """Symbolic link a list of paths into parent directory.

    @param paths: list of paths to symlink into parent directory

    Creates a symlink with the same name as the target file in the parent
    directory.

    Returns as a string the relative path to the new symlink.
    """
    # refuse to override a file without force
    # if a file already exists and points to where we want it, that's ok
    # if a file already exists and is either not a link or does not
    # point where we want it, raise an exception

    unwind_unlink_paths = []
    for path in paths:
        try:
            new_path = _symlink_into_parent_dir(path)
            if new_path is not None:
                unwind_unlink_paths.append(new_path)
        except:
            for path in unwind_unlink_paths:
                os.unlink(path)
            raise


def fetch_orig_from_cache(changelog, source, dl_cache=None):
    """Look for orig tarballs in the download cache

    @param changelog: gitubuntu.git_repository.Changelog object representing required
    version
    @param source: 'debian' or 'ubuntu': which cache to use
    @param dl_cache: string path to use as the cache directory; if
    None the the global CACHE_PATH is used

    Returns a list of orig tarball paths if found and the dsc verifies.
    Returns None if no matching upstream version was found in the cache.

    Raises XXX if a dsc found but failed to verify.
    """
    assert changelog.upstream_version and changelog.srcpkg
    cached_dsc_path = os.path.join(
        dl_cache or os.path.join(
            os.getcwd(),
            os.getenv('GIT_DIR', '.git'),
            CACHE_PATH
        ),
        source,
        changelog.srcpkg,
        changelog.upstream_version,
        'DSC',
    )
    try:
        dsc = GitUbuntuDsc(cached_dsc_path)
    except (FileNotFoundError, NotADirectoryError):
        return None
    if dsc.verify() is None:
        logging.warn(
            "Cache dir found, but verification of orig tarball(s) failed."
        )
        return None # XXX decide - see docstring

    orig_paths = dsc.all_tarball_paths
    _symlink_paths_into_parent_dir(orig_paths)
    return orig_paths


def fetch_orig_from_pristine_tar(changelog, source, repo):
    """Fetch orig tarballs from pristine-tar branches

    @changelog: gitubuntu.git_repository.Changelog object representing required version
    @source: 'debian' or 'ubuntu': which branch to use
    @repo: gitubuntu.git_repository.GitRepository object containing pristine-tar data

    Returns a list of orig tarball paths if found and extracted to
    parent directory.
    Returns None if matching orig tarballs were not found in the
    pristine-tar branches.
    """
    try:
        results = repo.pristine_tar_exists(
            changelog.srcpkg,
            changelog.upstream_version,
        )
    except gitubuntu.git_repository.MultiplePristineTarFoundError as e:
        logging.warning("%s. This is often because the orig "
            "tarball compression changed and it is not possible "
            "to determine which tarball to use automatically.", e
        )
        return None

    if len(results) == 0:
        logging.warning("No pristine-tar data found for %s",
            changelog.upstream_version
        )
        return None

    if len(results) == 1 and source not in results:
        logging.warning("pristine-tar data found for %s, but expected to "
            "find %s", next(iter(results)), source
        )
        return None

    if len(results) == 2:
        logging.warning("pristine-tar data found for both debian and "
            "ubuntu, using %s", source
        )

    try:
        return repo.pristine_tar_extract(
            changelog.srcpkg,
            changelog.upstream_version,
            source,
        )
    except CalledProcessError:
        return None


def fetch_orig_from_launchpad(changelog, source, pullfile, retries,
    retry_backoffs, dl_cache=None,
):
    """Fetch orig tarballs from Launchpad

    @changelog: gitubuntu.git_repository.Changelog object representing required version
    @source: 'debian' or 'ubuntu': which Launchpad distribution to search

    Returns a list of matching orig tarball paths if found and extracted
    to parent directory.

    Returns None if matching orig tarballs were not found in the
    specified distribution in Launchpad.
    """
    assert changelog.upstream_version and changelog.srcpkg
    assert source in ['debian', 'ubuntu']
    dist_sinfo = GitUbuntuSourceInformation(
        dist_name=source,
        pkgname=changelog.srcpkg,
        pull_overrides_filename=os.path.abspath(pullfile),
        retries=retries,
        retry_backoffs=retry_backoffs,
    )

    try:
        versions_published = dist_sinfo.launchpad_versions_published(
            workdir=dl_cache,
            sorted_by_version=True,
        )
    except NoPublicationHistoryException:
        logging.warning(
            "No publication history found for %s in %s. ",
            changelog.srcpkg,
            source,
        )
        return None

    for spi in versions_published:
        logging.debug(
            "Checking if upstream version of publish %s matches %s",
            spi.version,
            changelog.upstream_version,
        )
        if Version(spi.upstream_version) < Version(changelog.upstream_version):
            logging.info(
                "New upstream version detected (%s) which is after the last "
                "published upstream version (%s).",
                changelog.upstream_version,
                spi.upstream_version,
            )
            return None
        elif spi.upstream_version == changelog.upstream_version:
            logging.debug(
                "Upstream version of publish %s matches",
                spi.version,
            )

            spi.pull()
            dsc = GitUbuntuDsc(spi.dsc_pathname)
            orig_paths = dsc.all_tarball_paths
            if dl_cache is None:
                _symlink_paths_into_parent_dir(orig_paths)
            else:
                srcpkg_cache_dir = os.path.join(
                    dl_cache,
                    derive_source_from_changelog(changelog),
                    changelog.srcpkg,
                    changelog.upstream_version,
                )
                if not os.path.isdir(srcpkg_cache_dir):
                    os.makedirs(srcpkg_cache_dir, exist_ok=True)
                # populate cache
                logging.debug("Caching dsc file")
                _populate_cache(srcpkg_cache_dir, dsc.dsc_path, 'DSC')

                cached_paths  = []
                for path in orig_paths:
                    cached_paths.append(_populate_cache(srcpkg_cache_dir, path))
                _symlink_paths_into_parent_dir(cached_paths)

            return orig_paths

    # Fell out of the for loop: didn't find anything.
    return None


def derive_orig_search_list_from_args(
    repo,
    commitish,
    for_merge,
    no_pristine_tar,
    pullfile=top_level_defaults.pullfile,
    retries=top_level_defaults.retries,
    retry_backoffs=top_level_defaults.retry_backoffs,
    dl_cache=None,
):
    native = is_native_package(
        repo.get_changelog_from_treeish(commitish)
    )

    if native:
        # No orig tarball required
        return [
            OrigSearchListEntry(
                mechanism=fetch_orig_noop,
                source=None,
                must_build=True,
            ),
        ]

    source = 'debian' if for_merge else 'ubuntu'
    orig_search_list = [
        OrigSearchListEntry(
            mechanism=fetch_orig_from_parent_dir,
            source=None,
            must_build=True,
        ),
        OrigSearchListEntry(
            mechanism=fetch_orig_from_cache,
            source=source,
            must_build=False,
        ),
    ]
    if not no_pristine_tar:
        orig_search_list.append(
            OrigSearchListEntry(
                mechanism=functools.partial(fetch_orig_from_pristine_tar,
                    repo=repo,
                ),
                source=source,
                must_build=False,
            )
        )
    orig_search_list.extend([
        OrigSearchListEntry(
            mechanism=functools.partial(fetch_orig_from_launchpad,
                pullfile=pullfile,
                retries=retries,
                retry_backoffs=retry_backoffs,
                dl_cache=dl_cache,
            ),
            source=source,
            must_build=False,
        ),
    ])

    return orig_search_list


def derive_source_from_changelog(changelog):
    series = changelog.distribution.split('-')[0]
    return derive_source_from_series(series)

@pytest.mark.parametrize('changelog_name, expected', [
    ('test_distribution_source_1', 'ubuntu'),
    ('test_distribution_source_2', 'ubuntu'),
    ('test_distribution_source_3', 'debian'),
    ('test_distribution_source_4', 'ubuntu'),
])
def test_derive_source_from_changelog(changelog_name, expected):
    assert derive_source_from_changelog(
        get_test_changelog(changelog_name),
    ) == expected

def expand_changelog_source_aliases(orig_search_list, changelog):
    """Replace 'changelog' sources by reading debian/changelog

    @param orig_search_list: list of OrigSearchListEntry namedtuples
    Returns: a sequence with changelog sources expanded
    """
    for entry in orig_search_list:
        if entry.source == 'changelog':
            yield entry._replace(
                source=derive_source_from_changelog(changelog),
            )
        else:
            yield entry


def fetch_orig(
    orig_search_list,
    changelog,
):
    """
    :param orig_search_list: list of OrigSearchListEntry namedtuples.
    :rtype: list
    :returns: A list of successfully fetched tarballs, or an empty list
        if none were found.
    """
    unaliased_orig_search_list = expand_changelog_source_aliases(
        orig_search_list,
        changelog,
    )
    # Follow searches already attempted as a 'changelog' source alias
    # may expand to a duplicate.
    for entry in unique_everseen(unaliased_orig_search_list):
        try:
            mechanism_name = entry.mechanism.__name__
        except AttributeError:
            mechanism_name = entry.mechanism.func.__name__
        tarballs = entry.mechanism(changelog, entry.source)
        if tarballs is None:
            logging.debug('%s(source=%s) failed',
                mechanism_name,
                entry.source,
            )
            continue  # search returned negative; try next search entry
        assert isinstance(tarballs, list)
        logging.info('Successfully fetched%susing %s(source=%s)',
            ':\n' + '\n'.join(tarballs) + '\n' if tarballs else ' ',
            mechanism_name,
            entry.source,
        )
        return tarballs
    return []


class NativenessMismatchError(Exception): pass

def is_native_package(changelog):
    """Determine if package is native

    @changelog: gitubuntu.git_repository.Changelog object

    os.getpwd() is used to look for debian/source/format
    """
    version_is_native = '-' not in changelog.version

    try:
        with open('debian/source/format', 'r') as f:
            if 'native' not in f.read() and version_is_native:
                raise NativenessMismatchError(
                    "Native versioning found in debian/changelog, but "
                    "debian/source/format does not indicate a native "
                    "source package."
                )
    except FileNotFoundError:
        logging.warning(
            "No debian/source/format file found. "
            "Unable to verify native packaging format."
        )
    return version_is_native


# Adapted from https://docs.python.org/3/library/itertools.html
def unique_everseen(iterable):
    "List unique elements, preserving order. Remember all elements ever seen."
    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
    seen = set()
    # I'm not sure I like the idea of modifying the seen set during the
    # iteration of filterfalse. This then depends on the exact evaluation
    # order of filterfalse (eg. if it started looking ahead, this function
    # would return incorrect results). But this is what the officially
    # documented example does.
    for element in filterfalse(seen.__contains__, iterable):
        seen.add(element)
        yield element
@pytest.mark.parametrize('iterable, expected', [
    ('AAAABBBCCDAABBB', ['A', 'B', 'C', 'D']),
])
def test_unique_everseen(iterable, expected):
    assert list(unique_everseen(iterable)) == expected