File: download-s3.py

package info (click to toggle)
python-confluent-kafka 1.7.0-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,904 kB
  • sloc: python: 8,335; ansic: 6,065; sh: 1,203; makefile: 178
file content (178 lines) | stat: -rwxr-xr-x 6,209 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python
#
#
# Collects CI artifacts from S3 storage, downloading them
# to a local directory.
#
# The artifacts' folder in the S3 bucket must have the following token
# format:
#  <token>-[<value>]__   (repeat)
#
# Recognized tokens (unrecognized tokens are ignored):
#  p       - project (e.g., "confluent-kafka-python")
#  bld     - builder (e.g., "travis")
#  plat    - platform ("osx", "linux", ..)
#  tag     - git tag
#  sha     - git sha
#  bid     - builder's build-id
#
# Example:
#   p-confluent-kafka-python__bld-travis__plat-linux__tag-__sha-112130ce297656ea1c39e7c94c99286f95133a24__bid-271588764__/confluent_kafka-0.11.0-cp35-cp35m-manylinux1_x86_64.whl


import re
import os
import argparse

import boto3

s3_bucket = 'librdkafka-ci-packages'
dry_run = False


class Artifact (object):
    def __init__(self, arts, path, info=None):
        self.path = path
        # Remove unexpanded AppVeyor $(..) tokens from filename
        self.fname = re.sub(r'\$\([^\)]+\)', '', os.path.basename(path))
        self.lpath = os.path.join(arts.dlpath, self.fname)
        self.info = info
        self.arts = arts
        arts.artifacts.append(self)

    def __repr__(self):
        return self.path

    def download(self, dirpath):
        """ Download artifact from S3 and store in dirpath directory.
            If the artifact is already downloaded nothing is done. """
        if os.path.isfile(self.lpath) and os.path.getsize(self.lpath) > 0:
            return
        print('Downloading %s -> %s' % (self.path, self.lpath))
        if dry_run:
            return
        self.arts.s3_bucket.download_file(self.path, self.lpath)


class Artifacts (object):
    def __init__(self, gitref, dlpath):
        super(Artifacts, self).__init__()
        self.gitref = gitref
        self.artifacts = list()
        # Download directory
        self.dlpath = dlpath
        if not os.path.isdir(self.dlpath):
            if not dry_run:
                os.makedirs(self.dlpath, 0o755)

    def collect_single_s3(self, path, p_match=None):
        """ Collect single S3 artifact
         :param: path string: S3 path
         :param: p_match string: Optional p (project) tag to match
        """

        # The S3 folder (confluent-kafka-python/p-...__bld-../..) contains
        # the tokens needed to perform matching of project, gitref, etc.
        folder = path.split('/')[1]

        rinfo = re.findall(r'(?P<tag>[^-]+)-(?P<val>.*?)__', folder)
        if rinfo is None or len(rinfo) == 0:
            # print('Incorrect folder/file name format for %s' % folder)
            return None

        info = dict(rinfo)

        # Match project
        if p_match is not None and info.get('p', '') != p_match:
            return None

        # Ignore AppVeyor Debug builds
        if info.get('bldtype', '').lower() == 'debug':
            print('Ignoring debug artifact %s' % folder)
            return None

        tag = info.get('tag', None)
        if tag is not None and (len(tag) == 0 or tag.startswith('$(')):
            # AppVeyor doesn't substite $(APPVEYOR_REPO_TAG_NAME)
            # with an empty value when not set, it leaves that token
            # in the string - so translate that to no tag.
            tag = None

        sha = info.get('sha', None)

        # Match tag or sha to gitref
        if (tag is not None and tag == self.gitref) or (sha is not None and sha.startswith(self.gitref)):
            return Artifact(self, path, info)

        return None

    def collect_s3(self, s3_prefix, p_match=None):
        """ Collect and download build-artifacts from S3 based on git reference """
        print('Collecting artifacts matching %s from S3 bucket %s' % (self.gitref, s3_bucket))
        self.s3 = boto3.resource('s3')
        self.s3_bucket = self.s3.Bucket(s3_bucket)
        self.s3_client = boto3.client('s3')

        # note: list_objects will return at most 1000 objects per call,
        #       use continuation token to read full list.
        cont_token = None
        more = True
        while more:
            if cont_token is not None:
                res = self.s3_client.list_objects_v2(Bucket=s3_bucket,
                                                     Prefix=s3_prefix,
                                                     ContinuationToken=cont_token)
            else:
                res = self.s3_client.list_objects_v2(Bucket=s3_bucket,
                                                     Prefix=s3_prefix)

            if res.get('IsTruncated') is True:
                cont_token = res.get('NextContinuationToken')
            else:
                more = False

            for item in res.get('Contents'):
                self.collect_single_s3(item.get('Key'), p_match=p_match)

        for a in self.artifacts:
            a.download(self.dlpath)

    def collect_local(self, path):
        """ Collect artifacts from a local directory possibly previously
        collected from s3 """
        for f in os.listdir(path):
            lpath = os.path.join(path, f)
            if not os.path.isfile(lpath):
                continue
            Artifact(self, lpath)


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument("--no-s3", help="Don't collect from S3", action="store_true")
    parser.add_argument("--dry-run",
                        help="Locate artifacts but don't actually download or do anything",
                        action="store_true")
    parser.add_argument("--directory", help="Download directory (default: dl-<gitref>)", default=None)
    parser.add_argument("tag", help="Tag or git SHA to collect")

    args = parser.parse_args()
    dry_run = args.dry_run
    gitref = args.tag
    if not args.directory:
        args.directory = 'dl-%s' % gitref

    arts = Artifacts(gitref, args.directory)

    if not args.no_s3:
        arts.collect_s3('confluent-kafka-python/', 'confluent-kafka-python')
    else:
        arts.collect_local(arts.dlpath)

    if len(arts.artifacts) == 0:
        raise ValueError('No artifacts found for %s' % arts.gitref)

    print('Collected artifacts:')
    for a in arts.artifacts:
        print(' %s -> %s' % (a.path, a.lpath))