File: gen_fuzzer_owners.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (259 lines) | stat: -rwxr-xr-x 8,483 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/usr/bin/env python3
#
# Copyright 2018 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generates a `foo.owners` file for a `fuzzer_test("foo", ...)` GN target.

By default, the closest `OWNERS` file is located and copied, except for
`//OWNERS` and `//third_party/OWNERS` for fear of spamming top-level owners with
fuzzer bugs they know nothing about.

If no such file can be located, then we attempt to use `git blame` to identify
the author of the main fuzzer `.cc` file. Note that this does not work for code
in git submodules (e.g. most code in `third_party/`), in which case we generate
an empty file.

Invoked by GN from `fuzzer_test.gni`.
"""

import argparse
import os
import re
import subprocess
import sys

from typing import Optional

AUTHOR_REGEX = re.compile('author-mail <(.+)>')
CHROMIUM_SRC_DIR = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
OWNERS_FILENAME = 'OWNERS'
THIRD_PARTY = 'third_party'
THIRD_PARTY_SEARCH_STRING = THIRD_PARTY + os.path.sep


def GetAuthorFromGitBlame(blame_output):
  """Return author from git blame output."""
  for line in blame_output.decode('utf-8').splitlines():
    m = AUTHOR_REGEX.match(line)
    if m:
      return m.group(1)

  return None


def GetGitCommand():
  """Returns a git command that does not need to be executed using shell=True.
  On non-Windows platforms: 'git'. On Windows: 'git.bat'.
  """
  return 'git.bat' if sys.platform == 'win32' else 'git'


def GetOwnersFromOwnersFile(source: str) -> Optional[str]:
  """Finds the owners of `source` from the closest OWNERS file.

  Both //OWNERS or */third_party/OWNERS are ignored so as not to spam top-level
  owners with unowned fuzzer bugs.

  Args:
    source: Relative path from the chromium src directory to the target source
      file.

  Returns:
    The entire contents of the closest OWNERS file. That is, the first OWNERS
    file encountered while walking up through the ancestor directories of the
    target source file.
  """
  # TODO(crbug.com/41486296): Use `pathlib` instead of `os.path` for
  # better ergonomics and robustness.
  dirs = source.split(os.path.sep)[:-1]

  # Note: We never test for //OWNERS, i.e. when `dirs` is empty.
  while dirs:
    # Never return the contents of */third_party/OWNERS, and stop searching.
    if dirs[-1] == THIRD_PARTY:
      break

    owners_file_path = os.path.join(CHROMIUM_SRC_DIR, *dirs, OWNERS_FILENAME)
    if os.path.exists(owners_file_path):
      # TODO(crbug.com/41486296): OWNERS files can reference others,
      # have per-file directives, etc. We should be cleverer than this.
      return open(owners_file_path).read()

    dirs.pop()

  return None


def GetOwnersForFuzzer(sources):
  """Return owners given a list of sources as input."""
  if not sources:
    return None

  for source in sources:
    full_source_path = os.path.join(CHROMIUM_SRC_DIR, source)
    if not os.path.exists(full_source_path):
      continue

    with open(full_source_path, 'r') as source_file_handle:
      source_content = source_file_handle.read()

    if SubStringExistsIn(
        ['FuzzOneInput', 'LLVMFuzzerTestOneInput', 'PROTO_FUZZER'],
        source_content):
      # Found the fuzzer source (and not dependency of fuzzer).

      # Try finding the closest OWNERS file first.
      owners = GetOwnersFromOwnersFile(source)
      if owners:
        return owners

      git_dir = os.path.join(CHROMIUM_SRC_DIR, '.git')
      git_command = GetGitCommand()
      is_git_file = bool(
          subprocess.check_output(
              [git_command, '--git-dir', git_dir, 'ls-files', source],
              cwd=CHROMIUM_SRC_DIR))
      if not is_git_file:
        # File is not in working tree. If no OWNERS file was found, we cannot
        # tell who it belongs to.
        return None

      # `git log --follow` and `--reverse` don't work together and using just
      # `--follow` is too slow. Make a best estimate with an assumption that the
      # original author has authored the copyright block, which (generally) does
      # not change even with file rename/move. Look at the last line of the
      # block, as a copyright block update sweep in late 2022 made one person
      # responsible for changing the first line of every copyright block in the
      # repo, and it would be best to avoid assigning ownership of every fuzz
      # issue predating that year to that one person.
      blame_output = subprocess.check_output([
          git_command, '--git-dir', git_dir, 'blame', '--porcelain', '-L3,3',
          source
      ],
                                             cwd=CHROMIUM_SRC_DIR)
      return GetAuthorFromGitBlame(blame_output)

  return None


def FindGroupsAndDepsInDeps(deps_list, build_dir):
  """Return list of groups, as well as their deps, from a list of deps."""
  groups = []
  deps_for_groups = {}
  for deps in deps_list:
    output = subprocess.check_output(
        [GNPath(), 'desc', '--fail-on-unused-args', build_dir,
         deps]).decode('utf8')
    needle = 'Type: '
    for line in output.splitlines():
      if needle and not line.startswith(needle):
        continue
      if needle == 'Type: ':
        if line != 'Type: group':
          break
        groups.append(deps)
        assert deps not in deps_for_groups
        deps_for_groups[deps] = []
        needle = 'Direct dependencies'
      elif needle == 'Direct dependencies':
        needle = ''
      else:
        assert needle == ''
        if needle == line:
          break
        deps_for_groups[deps].append(line.strip())

  return groups, deps_for_groups


def TraverseGroups(deps_list, build_dir):
  """Filter out groups from a deps list. Add groups' direct dependencies."""
  full_deps_set = set(deps_list)
  deps_to_check = full_deps_set.copy()

  # Keep track of groups to break circular dependendies, if any.
  seen_groups = set()

  while deps_to_check:
    # Look for groups from the deps set.
    groups, deps_for_groups = FindGroupsAndDepsInDeps(deps_to_check, build_dir)
    groups = set(groups).difference(seen_groups)
    if not groups:
      break

    # Update sets. Filter out groups from the full deps set.
    full_deps_set.difference_update(groups)
    deps_to_check.clear()
    seen_groups.update(groups)

    # Get the direct dependencies, and filter out known groups there too.
    for group in groups:
      deps_to_check.update(deps_for_groups[group])
    deps_to_check.difference_update(seen_groups)
    full_deps_set.update(deps_to_check)
  return list(full_deps_set)


def GetSourcesFromDeps(deps_list, build_dir):
  """Return list of sources from parsing deps."""
  if not deps_list:
    return None

  full_deps_list = TraverseGroups(deps_list, build_dir)
  all_sources = []
  for deps in full_deps_list:
    output = subprocess.check_output(
        [GNPath(), 'desc', '--fail-on-unused-args', build_dir, deps, 'sources'])
    for source in bytes(output).decode('utf8').splitlines():
      if source.startswith('//'):
        source = source[2:]
      all_sources.append(source)

  return all_sources


def GNPath():
  if sys.platform.startswith('linux'):
    subdir, exe = 'linux64', 'gn'
  elif sys.platform == 'darwin':
    subdir, exe = 'mac', 'gn'
  else:
    subdir, exe = 'win', 'gn.exe'

  return os.path.join(CHROMIUM_SRC_DIR, 'buildtools', subdir, exe)


def SubStringExistsIn(substring_list, string):
  """Return true if one of the substring in the list is found in |string|."""
  return any(substring in string for substring in substring_list)


def main():
  parser = argparse.ArgumentParser(description='Generate fuzzer owners file.')
  parser.add_argument('--owners', required=True)
  parser.add_argument('--build-dir')
  parser.add_argument('--deps', nargs='+')
  parser.add_argument('--sources', nargs='+')
  args = parser.parse_args()

  # Generate owners file.
  with open(args.owners, 'w') as owners_file:
    # If we found an owner, then write it to file.
    # Otherwise, leave empty file to keep ninja happy.
    owners = GetOwnersForFuzzer(args.sources)
    if owners:
      owners_file.write(owners)
      return

    # Could not determine owners from |args.sources|.
    # So, try parsing sources from |args.deps|.
    deps_sources = GetSourcesFromDeps(args.deps, args.build_dir)
    owners = GetOwnersForFuzzer(deps_sources)
    if owners:
      owners_file.write(owners)


if __name__ == '__main__':
  main()