1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
#!/usr/bin/env python
"""
Downloads using s3transfer.processpool.ProcessPoolDownloader
Usage
=====
NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running.
To download a file::
./proccesspool-download -f myfilename -b mybucket -k mykey
To download a prefix recursively to a directory::
./proccesspool-download -d mydirname -b mybucket -p myprefix/
"""
import argparse
import os
import botocore.session
from s3transfer.processpool import ProcessPoolDownloader, ProcessTransferConfig
MB = 1024 * 1024
def download(bucket, key, filename, num_processes, mb_chunksize):
config = ProcessTransferConfig(
multipart_chunksize=mb_chunksize * MB,
max_request_processes=num_processes,
)
with ProcessPoolDownloader(config=config) as downloader:
future = downloader.download_file(
bucket=bucket, key=key, filename=filename
)
future.result()
def recursive_download(bucket, prefix, dirname, num_processes, mb_chunksize):
config = ProcessTransferConfig(
multipart_chunksize=mb_chunksize * MB,
max_request_processes=num_processes,
)
s3 = botocore.session.get_session().create_client('s3')
with ProcessPoolDownloader(config=config) as downloader:
paginator = s3.get_paginator('list_objects')
for response in paginator.paginate(Bucket=bucket, Prefix=prefix):
contents = response.get('Contents', [])
for content in contents:
key = content['Key']
filename = os.path.join(dirname, key[len(prefix) :])
parent_dirname = os.path.dirname(filename)
if not os.path.exists(parent_dirname):
os.makedirs(parent_dirname)
# An expected size is provided so an additional HeadObject
# does not need to be made for each of these objects that
# get downloaded.
downloader.download_file(
bucket,
key,
filename=filename,
expected_size=content['Size'],
)
def main():
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
'-b', '--bucket', required=True, help='The S3 bucket to download from'
)
single_file_group = parser.add_argument_group('Single file downloads')
single_file_group.add_argument(
'-k', '--key', help='The key to download from'
)
single_file_group.add_argument(
'-f', '--filename', help='The name of file to download to'
)
recursive_file_group = parser.add_argument_group(
'Recursive file downloads'
)
recursive_file_group.add_argument(
'-p', '--prefix', help='The prefix to download from'
)
recursive_file_group.add_argument(
'-d', '--dirname', help='The directory to download to'
)
parser.add_argument(
'-n',
'--num-processes',
type=int,
default=10,
help='The number of processes to run the download. 10 by default.',
)
parser.add_argument(
'-c',
'--mb-chunksize',
type=int,
default=8,
help='The part size in MB to use for the download. 8 MB by default.',
)
args = parser.parse_args()
if args.filename and args.key:
download(
args.bucket,
args.key,
args.filename,
args.num_processes,
args.mb_chunksize,
)
elif args.prefix and args.dirname:
recursive_download(
args.bucket,
args.prefix,
args.dirname,
args.num_processes,
args.mb_chunksize,
)
else:
raise ValueError(
'Either --key and --filename must be provided or '
'--prefix and --dirname must be provided.'
)
if __name__ == '__main__':
main()
|