File: source_information.py

package info (click to toggle)
git-ubuntu 1.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,688 kB
  • sloc: python: 13,378; sh: 480; makefile: 2
file content (782 lines) | stat: -rw-r--r-- 29,658 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
import functools
import heapq
import logging
import os
import re
import sys
import time
_LP_LOGIN = None
_LP_LOGIN_AUTH = None
_lp_service = 'production'
_lp_api_version = 'devel'

import cachetools
from distro_info import DebianDistroInfo, UbuntuDistroInfo
from launchpadlib.launchpad import Launchpad as LP
from ubuntutools.archive import UbuntuSourcePackage, DebianSourcePackage, DownloadError
import ubuntutools.lp.lpapicache
from ubuntutools.lp.lpapicache import SourcePackagePublishingHistory

_ddi = DebianDistroInfo()
_udi = UbuntuDistroInfo()

class SourceExtractionException(Exception):
    """Trivial exception class for source-extracting errors"""
    pass


@cachetools.cached(
    cache=cachetools.LFUCache(maxsize=128),
    key=lambda lpobj, attr: (lpobj._root, getattr(lpobj, '%s_link' % attr)),
)
def _get_cached_lp_link(lpobj, attr):
    """Fetch an LP object through a cache

    When <lpobj>.<foo>_link exists, this function can be used to fetch
    <lpobj>.<foo> through a cache keyed on <foo>_link and the logged in object.
    This is used for optimisation in the case that we need to get the same
    <foo> across many objects. For example, we fetch 'distribution' and
    'series' from many different source_package_publishing_history objects, and
    doing those fetches through this cache saves a very large number of API
    calls round trips.

    The cache is keyed on (lpobj._root, <lpobj>.<attr>_link) to maintain
    differing visibilities of objects across different logins.

    lpobj._root is technically private, but seems to be the only sensible
    option:

        <cjwatson> rbasak: I think using _root is your only option
        <cjwatson> It's technically private but in practice very stable

    :param lazr.restfulclient.resource.Entry lpobj: the object from Launchpad
    :param str attr: the key against the Launchpad object to fetch, for which a
        <lpobj>.<attr>_link entry exists
    :returns: the equivalent of <lpobj>.<attr>, but cached
    :rtype: lazr.restfulclient.resource.Entry
    """
    return getattr(lpobj, attr)


def launchpad_login():
    """Cache a single launchpad login here."""
    global _LP_LOGIN
    if _LP_LOGIN:
        return _LP_LOGIN

    _LP_LOGIN = LP.login_anonymously('git-ubuntu-importer', _lp_service,
                                     version=_lp_api_version)
    # This is deliberately in a long form and we do not import the Launchpad
    # object into the module namespace itself to work around LP: #1733388
    ubuntutools.lp.lpapicache.Launchpad.login_existing(_LP_LOGIN)

    return _LP_LOGIN


def launchpad_login_auth():
    """Cache a single launchpad login here."""
    global _LP_LOGIN_AUTH
    if _LP_LOGIN_AUTH:
        return _LP_LOGIN_AUTH

    _LP_LOGIN_AUTH = LP.login_with('git-ubuntu-importer', _lp_service,
                                   version=_lp_api_version)
    return _LP_LOGIN_AUTH


def derive_source_from_series(series):
    if series == 'devel':
        return 'ubuntu'
    if _ddi.valid(codename=series):
        return 'debian'
    if _udi.valid(codename=series):
        return 'ubuntu'
    raise ValueError("Unable to determine distribution from %s" % series)

def derive_codename_from_series(series):
    """Determine the release codename from the series name.

    :param str series: The series name from the Debian changelog
    :rtype str
    :returns: The corresponding codename for the series.
    :raises ValueError: If no valid codename could be determined.

    Each entry in a Debian changelog includes a package, version, series
    (aka 'distribution' in Debian documentation), and urgency in its
    first line.  The series can refer to either a Debian or Ubuntu
    release's codename (e.g. 'buster', 'sid', 'bionic', or 'eoan'), or
    an alias (e.g. 'unstable', 'testing').

    This routine determines whether the given series is valid for Ubuntu
    or Debian, and translates any aliases into the appropriate codename
    (e.g. 'stable' to 'buster', 'unstable' to 'sid').  If the codename
    was not found, a ValueError exception is thrown.
    """
    if _ddi.valid(codename=series):
        return _ddi.codename(release=series, default=series)
    if _udi.valid(codename=series):
        return _udi.codename(release=series, default=series)
    raise ValueError("Unable to determine codename from %s" % series)

# The following two classes are trivial wrappers around their
# corresponding ubuntutools.archive classes, except that they disable
# the user of non-Launchpad masters or mirrors. Instead, all downloads
# will go through Launchpad.
class LaunchpadUbuntuSourcePackage(UbuntuSourcePackage):
    def __init__(self, *args, **kwargs):
        super(LaunchpadUbuntuSourcePackage, self).__init__(
            *args,
            **kwargs,
        )
        self.masters = list()
        self.mirrors = list()


class LaunchpadDebianSourcePackage(DebianSourcePackage):
    def __init__(self, *args, **kwargs):
        super(LaunchpadDebianSourcePackage, self).__init__(
            *args,
            **kwargs,
        )
        self.masters = list()
        self.mirrors = list()


class GitUbuntuPPASourcePackage(UbuntuSourcePackage):
    "Download / unpack an PPA source package"
    def __init__(self, ppa_spec, *args, **kwargs):
        super(GitUbuntuPPASourcePackage, self).__init__(*args, **kwargs)
        self.masters = list()
        self.mirrors = list()
        _, ppa = ppa_spec.split(':')
        self._ppa_owner, self._ppa_name = ppa.split('/', 1)
        self._spph = None

    def _lp_url(self, filename):
        "Build a source package URL on Launchpad"
        ret = os.path.join('https://launchpad.net', "~%s" % self._ppa_owner,
                            '+archive', 'ubuntu', self._ppa_name,
                            '+files', filename)
        return ret

    @property
    def lp_spph(self):
        "Return the LP Source Package Publishing History entry"
        if not self._spph:
            launchpad = launchpad_login()
            possible_people = launchpad.people.findPerson(text=self._ppa_owner)
            for person in possible_people:
                if person.name == self._ppa_owner:
                    ppa_owner = person
                    break
            spph = (ppa_owner.getPPAByName(name=self._ppa_name)
                          .getPublishedSources(
                              source_name=self.source,
                              version=self.version.full_version,
                              exact_match=True,
                          ))
            self._spph = SourcePackagePublishingHistory(spph[0])
        return self._spph


class GitUbuntuSourcePackageInformation:
    def __init__(self, spphr, dist_name, retries=0, retry_backoffs=[],
                 workdir=None, dsc=None, files=list()):
        self._spphr = spphr
        self._dist_name = dist_name
        self._pkgname = self.spphr.source_package_name
        self._version = self.spphr.source_package_version
        self.retries = retries
        self.retry_backoffs = retry_backoffs

        if self._dist_name.startswith('ppa:'):
            func = functools.partial(GitUbuntuPPASourcePackage,
                                     self._dist_name)
        else:
            if self.distribution_name.lower() == 'ubuntu':
                func = LaunchpadUbuntuSourcePackage
            else:
                func = LaunchpadDebianSourcePackage

        # do this here, in case files is passed
        if workdir and not os.path.isdir(workdir):
             os.makedirs(workdir, exist_ok=True)

        self._archive_srcpkg = func(package=self._pkgname,
                                    version=self._version,
                                    workdir=workdir,
                                    quiet=True,
                                    dscfile=dsc)
        for f in files:
            self._archive_srcpkg._download_file(f, f.split('/')[-1])

    @property
    def distribution_name(self):
        # This is the equivalent of self.distribution.name, but accesses
        # distribution through a cache to prevent excessive API round trips.
        distro_series = _get_cached_lp_link(self._spphr, 'distro_series')
        distribution = _get_cached_lp_link(distro_series, 'distribution')
        return distribution.name

    @property
    def series_name(self):
        # This is the equivalent of self.series.name, but accesses
        # distribution through a cache to prevent excessive API round trips.
        return _get_cached_lp_link(self._spphr, 'distro_series').name

    @property
    def parent_series_name(self):
        # This is the equivalent of self.parent_series.name, but accesses
        # distribution through a cache to prevent excessive API round trips.
        distro_series = _get_cached_lp_link(self._spphr, 'distro_series')
        previous_series = _get_cached_lp_link(distro_series, 'previous_series')
        return previous_series.name

    @property
    def spphr(self):
        return self._spphr

    @property
    def version(self):
        return self._version

    @property
    def upstream_version(self):
        return self._archive_srcpkg.version.upstream_version

    @property
    def name(self):
        return self._pkgname

    @property
    def archive_srcpkg(self):
        return self._archive_srcpkg

    @property
    def series(self):
        return self._spphr.distro_series

    @property
    def parent_series(self):
        return self._spphr.distro_series.previous_series

    @property
    def distribution(self):
        return self._spphr.distro_series.distribution

    @property
    def date_published(self):
        return self._spphr.date_published

    @property
    def date_created(self):
        return self._spphr.date_created

    @property
    def pocket(self):
        return self._spphr.pocket

    @property
    def pretty_head_name(self):
        if self._spphr.pocket.lower() == 'release':
            head_name = '%s/%s' % (
            self.distribution_name.lower(),
            self.series_name.lower(),
            )
        else:
            head_name = '%s/%s-%s' % (
            self.distribution_name.lower(),
            self.series_name.lower(),
            self.pocket.lower()
            )
        return head_name

    def head_name(self, prefix):
        if self._spphr.pocket.lower() == 'release':
            head_name = '%s/%s/%s' % (
            prefix,
            self.distribution_name.lower(),
            self.series_name.lower(),
            )
        else:
            head_name = '%s/%s/%s-%s' % (
            prefix,
            self.distribution_name.lower(),
            self.series_name.lower(),
            self.pocket.lower()
            )
        return head_name

    def applied_head_name(self, prefix):
       return self.head_name('%s/applied' % prefix)

    def parent_head_name(self, prefix):
        if self.parent_series == None:
            return None
        if self.pocket.lower() == 'release':
            # release pockets descend from prior series
            head_name = '%s/%s/%s' % (
            prefix,
            self.distribution_name.lower(),
            self.parent_series_name.lower()
            )
        else:
            # non-release pockets descend from release
            head_name = '%s/%s/%s' % (
            prefix,
            self.distribution_name.lower(),
            self.series_name.lower()
            )
        return head_name

    def parent_applied_head_name(self, prefix):
        return self.parent_head_name('%s/applied' % prefix)

    @property
    def dsc(self):
        return self._archive_srcpkg.dsc

    @property
    def dsc_pathname(self):
        return self._archive_srcpkg.dsc_pathname

    def pull(self):
        for i in range(self.retries+1):
            try:
                self._archive_srcpkg.pull()
                break
            except Exception as e:
                logging.error('Failed to pull down source '
                              '(attempt %d/%d)' %
                              (i+1, self.retries+1))
                logging.debug("Source package download error: %r" % e)
                if i < self.retries:
                    time.sleep(self.retry_backoffs[i])
        else:
            raise DownloadError('Failed to pull down source')

    @property
    def workdir(self):
        return self._archive_srcpkg.workdir

    def get_changes_file_url(self):
        """Retrieve the changes file URL associated with this publication

        If there is no changes file URL, then return None. This happens, for
        example, with publications that were synced from Debian.

        :rtype: str or None
        :returns: the changes file URL associated with this publication, or
            None if one isn't available.
        """
        return self._spphr.changesFileUrl()


class NoPublicationHistoryException(Exception):
    pass


# An abstraction of an information source about source packages
class GitUbuntuSourceInformation(object):
    # Some seriess are listed as active in Launchpad but aren't available via
    # apt, so this is a list of those to exclude as a workaround. See
    # https://answers.launchpad.net/launchpad/+question/680444
    DENYLISTED_ACTIVE_SERIESS = {
        'debian': frozenset(['wheezy']),
        'ubuntu': frozenset(),
    }

    _all_series_list = None
    _active_series_list = None
    _stable_series_list = None
    _current_series = None

    def __init__(self, dist_name, pkgname=None,
                 pull_overrides_filename='/dev/null',
                 retries=0,
                 retry_backoffs=[],
                 lp=None,
                ):
        self.launchpad = lp or launchpad_login()
        self.dist_name = dist_name
        if self.dist_name.startswith('ppa:'):
            _, ppa = dist_name.split(':')
            ppa_owner, ppa_name = ppa.split('/', 1)
            possible_people = self.launchpad.people.findPerson(text=ppa_owner)
            person = None
            for person in possible_people:
                if person.name == ppa_owner:
                    ppa_owner = person
                    break
            if person is None:
                raise ValueError(
                    "Unable to find owner for PPA: %s" % dist_name
                )
            self.archive = person.getPPAByName(name=ppa_name)
        else:
            self.dist = self.launchpad.distributions[dist_name]
            self.archive = self.dist.main_archive
        self.pkgname = pkgname
        self.pull_overrides = self.parse_pullfile(pull_overrides_filename)
        self.retries = retries
        self.retry_backoffs = retry_backoffs

    @staticmethod
    def _spi_found_in_relevant_head(head_info, namespace, spi):
        """Determine if a branch head matches the given source publication

        Consider the branch head that is expected to be updated were the source
        publication to be imported. If the source publication appears to
        already be imported there, then this method returns True.

        This is assumed if the commit timestamp at the branch head matches the
        date_created timestamp of the source publication, and the package
        version strings also match.

        :param dict(str, HeadInfoItem) head_info: the state of the relevant
            branch heads, as returned by GitUbuntuRepository.get_head_info().
        :param str namespace: the namespace prefix used in the git repository.
        :param GitUbuntuSourcePackageInformation spi: the Launchpad publication
            to consider.
        :rtype: bool
        :returns: True if the spi matches the relevant branch head
        """
        try:
            head_info_item = head_info[spi.head_name(namespace)]
        except KeyError:
            # We don't have information for the relevant branch. Probably
            # because it doesn't exist. So our answer is no: we don't appear to
            # have this source publication imported.
            return False
        return (
            head_info_item.version == spi.version
            and int(spi.date_created.timestamp()) == head_info_item.commit_time
        )

    @property
    def current_series(self):
        if self.dist_name.startswith('ppa:'):
             return None
        return self.dist.current_series

    @property
    def denylisted_active_seriess(self):
        return self.DENYLISTED_ACTIVE_SERIESS[self.dist_name]

    @property
    def active_series(self):
        if self.dist_name.startswith('ppa:'):
            return []
        # return a list of series objects sorted with newest first.
        if self._active_series_list is None:
            self._active_series_list = sorted(
                [
                    r
                    for r in self.dist.series
                    if (
                        r.active
                        and not r.name in self.denylisted_active_seriess
                    )
                ],
                key=lambda s: float(s.version),
                reverse=True,
            )
        return self._active_series_list

    @property
    def stable_series(self):
        if self.dist_name.startswith('ppa:'):
            return []
        # return a list of released series objects sorted with newest first.
        if self._stable_series_list is None:
            self._stable_series_list = sorted(
                [
                    r for r in self.dist.series if r.active and
                    r.status in ('Current Stable Release', 'Supported')
                ],
                key=lambda s: float(s.version),
                reverse=True,
            )
        return self._stable_series_list

    @property
    def all_series(self):
        if self.dist_name.startswith('ppa:'):
            return []
        # return a list of all series objects sorted with newest first
        if self._all_series_list is None:
            self._all_series_list = sorted(
                self.dist.series,
                key=lambda s: float(s.version),
                reverse=True,
            )
        return self._all_series_list

    @property
    def current_series_name(self):
        if not self.current_series:
             return None
        return self.current_series.name

    @property
    def active_series_name_list(self):
        return [r.name for r in self.active_series]

    @property
    def stable_series_name_list(self):
        return [r.name for r in self.stable_series]

    @property
    def all_series_name_list(self):
        return [r.name for r in self.all_series]

    def get_corrected_spi(self, srcpkg, workdir=None):
       try:
           pull_override = self.pull_overrides[srcpkg.source_package_version]
           dsc = pull_override['dsc']
           files = pull_override['files']
       except KeyError:
           dsc = None
           files = list()
       return GitUbuntuSourcePackageInformation(srcpkg, self.dist_name,
           self.retries, self.retry_backoffs, workdir=workdir,
           dsc=dsc, files=files)

    def launchpad_version_is_published(self, version, workdir=None):
        spph = self.archive.getPublishedSources(
            exact_match=True,
            source_name=self.pkgname,
            version=version,
            order_by_date=True,
        )
        return len(spph) != 0

    def launchpad_versions_published(self, workdir=None,
        sorted_by_version=False, series=None
    ):
        args = {
                'exact_match':True,
                'source_name':self.pkgname,
               }
        if not sorted_by_version:
            args['order_by_date'] = True
        if series:
            args['distro_series'] = series

        spph = self.archive.getPublishedSources(**args)
        if len(spph) == 0:
            raise NoPublicationHistoryException("Is %s published in %s?" %
                                                (self.pkgname, self.dist_name))

        for srcpkg in spph:
            yield self.get_corrected_spi(srcpkg, workdir)

    def launchpad_versions_published_after(
        self,
        head_info,
        namespace,
        workdir=None,
        active_series_only=False,
    ):
        """Return a sequence of GitUbuntuSourcePackageInformation instances

        Return a sequence of GitUbuntuSourcePackageInformation instances
        representing Launchpad publications created in a Launchpad distribution
        after a particular point, as determined by the head_info parameter.

        :param dict head_info: as returned by
            GitUbuntuRepository.get_head_info(). This may be empty, in
            which case all publications are returned. Otherwise, publications
            are skipped that match a version already found in head_info, or
            with a date_created older than what is found in head_info, is
            skipped, as well as any publications that have a prior date_created
            date.
        :param str namespace: the namespace prefix used in the git repository.
        :param workdir: passed through to the GitUbuntuSourcePackageInformation
            constructor.
        :param bool active_series_only: for Ubuntu, skip any series that is not
            an active series, as determined by self.active_series_name_list.
        :rtype: sequence(GitUbuntuSourcePackageInformation)
        """

        # we have the date of the commit too, so we can double-check
        # that it matches
        # iterate from the newest to oldest publish
        # if published version is != corresponding head's version
        #     save off publish
        # once we see the first version matching exactly a head, we
        # assume that all prior versions have been imported/uploaded and
        # so we start the following loop from there
        # this favors (performance-wise) updating an existing tree, but
        # that's the expected use-case.

        # do not do coherence checks on versions less than the head
        # versions, as there are cases (clamav 0.91.2-3ubuntu2.2~feisty1
        # in feisty-backports of a 'next' version being prior in the
        # ordering)
        spph = self.archive.getPublishedSources(
            exact_match=True,
            source_name=self.pkgname,
            order_by_date=True,
        )
        # Coherence check that the passed in srcpkg name has a publication
        # history
        if len(spph) == 0:
            logging.warning("No publication history found for %s in %s.",
                self.pkgname, self.dist_name
            )
            return
        truncated_spph = list()
        for spphr in spph:
            spi = GitUbuntuSourcePackageInformation(
                spphr, self.dist_name, workdir=workdir
            )
            if self._spi_found_in_relevant_head(head_info, namespace, spi):
                break
            truncated_spph.append(spphr)

        for spphr in reversed(truncated_spph):
            spi = self.get_corrected_spi(spphr, workdir)
            if active_series_only and spi.series.name.lower() not in self.active_series_name_list:
                continue
            yield spi

    @staticmethod
    def interleave_launchpad_versions_published_after(
        gusi_head_info_tuple_list,
        namespace,
        workdir=None,
        active_series_only=False,
    ):
        """Interleave multiple calls to launchpad_versions_published_after

        Interleave the result of multiple calls to the
        launchpad_versions_published_after() method. The specification requires
        new publications to be processed across the Debian and Ubuntu
        distributions in order of date_created to establish
        hash stability. launchpad_versions_published_after() returns the result
        of one distribution at a time. This method calls any number
        simultaneously and interleaves the results so that the caller sees a
        single combined sequence in ascending order of date_created. If
        date_created is the same, results are provided in the order that the
        distributions are specified in gusi_head_info_tuple_list. In other
        words, the distribution ordering is the secondary sort key.

        :param list(tuple(GitUbuntuSourceInformation, dict))
            gusi_head_info_tuple_list: a list of the parameters to use for the
            underlying launchpad_versions_published_after() method call. The
            GitUbuntuSourceInformation instance is the object to call the
            method against. The second element of the tuple is the head_info
            parameter to pass to that method. The other parameters are passed
            through to the underlying call as-is.
        :param namespace: passed through to
            launchpad_versions_published_after().
        :param workdir: passed through to launchpad_versions_published_after().
        :param bool active_series_only: passed through to
            launchpad_versions_published_after().
        :rtype: sequence(GitUbuntuSourcePackageInformation)
        :returns: the combined return sequences of the underlying calls to
            launchpad_versions_published_after() interleaved as specified.
        """
        # Create a mapping of distribution to priority, where distribution is
        # the Launchpad distribution_link string representing the distribution,
        # and priority is an integer (low number=high priority). Then we will
        # be able to sort according to priority as a secondary key to ensure
        # that if timestamps of a publication are the same across
        # distributions, the distribution mentioned first in
        # gusi_head_info_tuple_list will appear in the results first.
        dist_priority = {
            gusi.dist.self_link: i
            for i, (gusi, head_info)
            in enumerate(gusi_head_info_tuple_list)
        }

        # Now that we have a mapping of distribution_link to priority, the sort
        # key function is simple: sort on date created first, and if they are
        # equal then use the distribution priority. Python will do this for us
        # if we provide a tuple. The distribution priority is necessary because
        # there is no guarantee that heapq.merge(), as used below, provides a
        # stable sort. If it did, then keying on date_created only would be
        # sufficient.
        def key_func(spi):
            distro_series = _get_cached_lp_link(spi._spphr, 'distro_series')
            distribution_link = distro_series.distribution_link
            return spi._spphr.date_created, dist_priority[distribution_link]

        # Create one generator per gusi_head_info_tuple_list entry.
        spi_generators_to_interleave = [
            gusi.launchpad_versions_published_after(
                head_info=head_info,
                namespace=namespace,
                workdir=workdir,
                active_series_only=active_series_only,
            )
            for gusi, head_info in gusi_head_info_tuple_list
        ]

        # heapq.merge() will now do the interleaving. This relies on the
        # results of each of the calls to launchpad_versions_published_after()
        # already being sorted by date_created.
        return heapq.merge(
            *spi_generators_to_interleave,
            key=key_func,
        )

    def parse_pullfile(self, pullfile):
        """Extract source file overrides from a file

        The pull overrides file specifies sources to use for a source
        package instead of Launchpad, so that a publish is importable.
        Typically, this is needed when an orig tarball is republished with
        different contents than before, but with the same name, e.g., when
        an epoch bump occurs but the version is not changed. This is no
        longer typical, but can be seen historically.

        The format of the pull overrides file is:
        <pkgname> <version> <URL of dsc file> <URLs of orig files>
        with one package per line.

        <pkgname> is the name of the source package to which this override
        applies.

        <version> is the published version which needs
        this override.

        <URL of dsc file> is a URL to a valid DSC file for this publication.
        '-' indicates to use the DSC file from Launchpad.

        <URLs of orig files> is a whitespace-separated list of further URLs
        to download that are shadowed in Launchpad.

        Keyword Arguments:
        pullfile -- Path to patch overrides file, or None if not required
        """
        pull_overrides = dict()
        if not pullfile:
            return pull_overrides
        try:
            with open(pullfile) as f:
                for line in f:
                    if line.startswith('#'):
                        continue
                    m = re.match(
                        r'(?P<pkgname>\S*)\s*(?P<version>\S*)\s*(?P<dscurl>\S*)\s*(?P<files>.*)',
                        line
                        )
                    if m is None:
                        continue
                    if m.group('pkgname') != self.pkgname:
                        continue
                    if m.group('dscurl') == '-':
                        # use DSC file from LP
                        dscurl = None
                    else:
                        dscurl = m.group('dscurl')
                    files = m.group('files').split()
                    pull_overrides[m.group('version')] = {
                        'dsc':dscurl,
                        'files':files
                        }
        except FileNotFoundError:
            pass
        return pull_overrides