1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Download all the fuzzing corpora associated with all Chromium libfuzzer
targets.
Assumes that fuzzer targets are already built and reside in the BUILD_DIR
directory.
* Example usage: download_fuzz_corpora.py --download-dir [DOWNLOAD_DIR]
--build-dir [BUILD_DIR]
"""
CORPORA_BUCKET_BASE_URL = "gs://clusterfuzz-libfuzzer-backup/corpus/libfuzzer/"
import argparse
import coverage_consts
import logging
from multiprocessing import cpu_count, Pool
import os
import subprocess
import sys
def _gsutil(cmd):
subprocess.run(cmd)
def _download_corpus(args):
target = args[0]
download_dir = args[1]
target_folder = os.path.join(download_dir, target)
subprocess.run(['mkdir', target_folder])
target_path = os.path.join(CORPORA_BUCKET_BASE_URL, target, "public.zip")
gsutil_cmd = ['gsutil', 'cp', target_path, target_folder]
_gsutil(gsutil_cmd)
def _unzip_corpus(args):
target = args[0]
download_dir = args[1]
target_folder = os.path.join(download_dir, target)
target_path = os.path.join(download_dir, target, "public.zip")
subprocess.run(['unzip', "public.zip"], cwd=target_folder)
subprocess.run(['rm', 'public.zip'], cwd=target_folder)
try:
# Unzipping the corpora often also contains a "regressions" folder, which
# is a subset of the total corpus, so can be ignored/removed
subprocess.run(['rm', '-rf', 'regressions'], cwd=target_folder)
except:
pass
def unzip_corpora(download_dir, corpora_to_download):
with Pool(cpu_count()) as p:
results = p.map(_unzip_corpus, [(corpus, args.download_dir)
for corpus in corpora_to_download])
def _ParseCommandArguments():
"""Adds and parses relevant arguments for tool comands.
Returns:
A dictionary representing the arguments.
"""
arg_parser = argparse.ArgumentParser()
arg_parser.usage = __doc__
arg_parser.add_argument('--download-dir',
type=str,
required=True,
help='Directory into which corpora are downloaded.')
arg_parser.add_argument('--build-dir',
required=True,
type=str,
help='Directory where fuzzers were built.')
args = arg_parser.parse_args()
return args
def Main():
args = _ParseCommandArguments()
exit
if not args.download_dir:
logging.error("No download_dir given")
exit
if not os.path.isdir(args.download_dir):
logging.error("%s does not exist or is not a directory" % args.download_dir)
exit
if not args.build_dir:
logging.error("No build_dir given")
exit
if not os.path.isdir(args.build_dir):
logging.error("%s does not exist or is not a directory" % args.build_dir)
exit
corpora_to_download = []
for target in os.listdir(args.build_dir):
if target.endswith('_fuzzer'):
corpora_to_download.append(target)
print("Corpora to download: " + str(corpora_to_download))
with Pool(cpu_count()) as p:
results = p.map(_download_corpus, [(corpus, args.download_dir)
for corpus in corpora_to_download])
with Pool(cpu_count()) as p:
results = p.map(_unzip_corpus, [(corpus, args.download_dir)
for corpus in corpora_to_download])
if __name__ == '__main__':
sys.exit(Main())
|