File: download_fuzz_corpora.py

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,112,112 kB
  • sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (115 lines) | stat: -rwxr-xr-x 3,524 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Download all the fuzzing corpora associated with all Chromium libfuzzer
targets.

Assumes that fuzzer targets are already built and reside in the BUILD_DIR
directory.

  * Example usage: download_fuzz_corpora.py --download-dir [DOWNLOAD_DIR]
    --build-dir [BUILD_DIR]
"""

CORPORA_BUCKET_BASE_URL = "gs://clusterfuzz-libfuzzer-backup/corpus/libfuzzer/"

import argparse
import coverage_consts
import logging
from multiprocessing import cpu_count, Pool
import os
import subprocess
import sys


def _gsutil(cmd):
  subprocess.run(cmd)


def _download_corpus(args):
  target = args[0]
  download_dir = args[1]
  target_folder = os.path.join(download_dir, target)
  subprocess.run(['mkdir', target_folder])
  target_path = os.path.join(CORPORA_BUCKET_BASE_URL, target, "public.zip")
  gsutil_cmd = ['gsutil', 'cp', target_path, target_folder]
  _gsutil(gsutil_cmd)


def _unzip_corpus(args):
  target = args[0]
  download_dir = args[1]
  target_folder = os.path.join(download_dir, target)
  target_path = os.path.join(download_dir, target, "public.zip")
  subprocess.run(['unzip', "public.zip"], cwd=target_folder)
  subprocess.run(['rm', 'public.zip'], cwd=target_folder)
  try:
    # Unzipping the corpora often also contains a "regressions" folder, which
    # is a subset of the total corpus, so can be ignored/removed
    subprocess.run(['rm', '-rf', 'regressions'], cwd=target_folder)
  except:
    pass


def unzip_corpora(download_dir, corpora_to_download):
  with Pool(cpu_count()) as p:
    results = p.map(_unzip_corpus, [(corpus, args.download_dir)
                                    for corpus in corpora_to_download])


def _ParseCommandArguments():
  """Adds and parses relevant arguments for tool comands.

  Returns:
    A dictionary representing the arguments.
  """
  arg_parser = argparse.ArgumentParser()
  arg_parser.usage = __doc__

  arg_parser.add_argument('--download-dir',
                          type=str,
                          required=True,
                          help='Directory into which corpora are downloaded.')
  arg_parser.add_argument('--build-dir',
                          required=True,
                          type=str,
                          help='Directory where fuzzers were built.')
  args = arg_parser.parse_args()
  return args


def Main():
  args = _ParseCommandArguments()
  exit

  if not args.download_dir:
    logging.error("No download_dir given")
    exit
  if not os.path.isdir(args.download_dir):
    logging.error("%s does not exist or is not a directory" % args.download_dir)
    exit
  if not args.build_dir:
    logging.error("No build_dir given")
    exit
  if not os.path.isdir(args.build_dir):
    logging.error("%s does not exist or is not a directory" % args.build_dir)
    exit

  corpora_to_download = []
  for target in os.listdir(args.build_dir):
    if target.endswith('_fuzzer'):
      corpora_to_download.append(target)

  print("Corpora to download: " + str(corpora_to_download))

  with Pool(cpu_count()) as p:
    results = p.map(_download_corpus, [(corpus, args.download_dir)
                                       for corpus in corpora_to_download])
  with Pool(cpu_count()) as p:
    results = p.map(_unzip_corpus, [(corpus, args.download_dir)
                                    for corpus in corpora_to_download])


if __name__ == '__main__':
  sys.exit(Main())