1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
|
#!/usr/bin/env python3
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Bisects a failure in the captured sites framework to find culprit CL.
First, this script will retrieve test results from the given build number and
the prior build number.
It will then find sites that changed from passing to failing across those
builds, and pick a site to use for bisection.
Finally, it will then launch a local git bisect, using the 'Release' version
(to match how the bots operate) and running tests in the background.'
In order to run the bisect, no pending git changes can exist in the local
checkout, and checkout_chromium_autofill_test_dependencies should be set to
true in your .gclient file.
This script requires Read permissions of ResultDB and Buildbucket RPCs of
internal builders, so it is intended only for Googlers at this time.
Common tasks:
Bisect an autofill bot failure:
1) First, find the first failing build you are interested in for the
linux-autofill-captured-sites-rel bot.
2) Note the build number {build_num}. The script will use the first site
which failed in that run and not the previous to bisect with.
3) Run:
`tools/captured_sites/bisect_failures.py autofill {build_num}`
Bisect an autofill bot failure using a specific site. Note: script does not
verify that given site overrides are valid recorded site names.
1) First, find the first failing build you are interested in for the
linux-autofill-captured-sites-rel bot.
2) Note the build number {build_num} and site {site_name}.
3) Run:
`tools/captured_sites/bisect_failures.py autofill {build_num}
--site_name {site_name}`
Bisect an password bot failure:
1) First, find the first failing build you are interested in for the
linux-password-manager-captured-sites-rel bot.
2) Note the build number {build_num}.
3) Run:
`tools/captured_sites/bisect_failures.py password {build_num}`
"""
import os
import sys
import argparse
import captured_sites_commands
_TOOLS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')
def _JoinPath(*path_parts):
return os.path.abspath(os.path.join(*path_parts))
def _InsertPath(path):
assert os.path.isdir(path), 'Not a valid path: %s' % path
if path not in sys.path:
# Some call sites that use Telemetry assume that sys.path[0] is the
# directory containing the script, so we add these extra paths to right
# after sys.path[0].
sys.path.insert(1, path)
def _AddDirToPythonPath(*path_parts):
path = _JoinPath(*path_parts)
_InsertPath(path)
_AddDirToPythonPath(_TOOLS_DIR, 'bisect')
import bisect_gtests
_AddDirToPythonPath(_TOOLS_DIR, 'perf')
from core.services import buildbucket_service
from core.services import resultdb_service
def _FindSiteRegressions(bad_buildbucket_id, good_buildbucket_id):
"""Retrieve failed sites from 2 builds, and return the new failures.
Args:
bad_buildbucket_id: A build with new failures.
good_buildbucket_id: A baseline build.
Returns:
The sites which fail in the bad build but not in the good build.
"""
bad_failed_sites = _GetTerminalSiteFailures(bad_buildbucket_id)
good_failed_sites = _GetTerminalSiteFailures(good_buildbucket_id)
site_regressions = [k for k in bad_failed_sites if k not in good_failed_sites]
return site_regressions
# These are convenience shorthand names for command line usage.
_BOT_SHORT_LONG_MAPPING = {
'autofill': 'linux-autofill-captured-sites-rel',
'password': 'linux-password-manager-captured-sites-rel',
}
def _GetBotName(short_name):
"""Maps convenient short names to full bot names.
Args:
short_name: Either "autofill" or "password"
Returns:
The full bot name to be used in build retrievals.
Raises:
ValueError: If an invalid short_name is given.
"""
if short_name not in _BOT_SHORT_LONG_MAPPING:
raise ValueError(f'Unrecognized short bot name: "{short_name}".'
' Only "autofill" or "password" is known.')
return _BOT_SHORT_LONG_MAPPING[short_name]
def _GetTerminalSiteFailures(buildbucket_id):
"""Retrieves non-passing (CRASH/TIMEOUT/FAIL) test failures from the relevant
*_captured_sites_interactive_tests test suite of the given build. Filters out
failures from any other suite(s).
Args:
buildbucket_id: A build with which to interrogate failures.
Returns:
The sites which do not pass in the build.
"""
all_results = resultdb_service.GetQueryTestResult(buildbucket_id)
if 'testResults' not in all_results:
return set()
site_statuses = {}
for result in all_results['testResults']:
test_id = result['testId']
# Check to ensure that the failure is from a captured sites test suite.
variant_test_suite = result.get('variant', {}).get('def',
{}).get('test_suite')
if not variant_test_suite:
print(f'Ill-formed results for {test_id}, will skip unexpectedly.')
continue
if not variant_test_suite.endswith('_captured_sites_interactive_tests'):
continue
site_name = test_id[test_id.rfind('All.') + 4:]
site_statuses[site_name] = result['status']
site_final_failures = [k for k, v in site_statuses.items() if v != 'PASS']
return set(site_final_failures)
def _ParseCommandLine(args):
"""Parses command line options from given args.
Args:
args: argument which to parse.
Returns:
An object containing parsed argument values.
Raises:
SystemExit: if invalid or unrecognized arguments were given.
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter)
parser.usage = __doc__
parser.add_argument(
'bot_name',
choices=list(_BOT_SHORT_LONG_MAPPING.keys()),
help='Choose which linux captured sites builder flavor to bisect.')
parser.add_argument('build_number',
type=int,
help='The failing build number to bisect')
parser.add_argument(
'-s',
'--site_name',
type=str,
help=(
'An explicit site to use for bisecting (versus picking a failing one'
' from build result).'))
parser.add_argument(
'-p',
'--print_only',
action='store_true',
help='Only print the commands that would be used to bisect.')
options = parser.parse_args(args)
return options
def _RetrieveBuildbucketInfo(builder_name, number):
"""From a given builder name and number, retrieve relevant hash and id.
Args:
builder_name: The full builder name to query.
number: The number of the builder to gather infro from.
Returns:
A tuple of the builds (hash, id).
Raises:
BuildRequestError: if improper permissions or query format is given.
"""
build = buildbucket_service.GetBuild('chrome', 'ci', builder_name, number)
build_bucket_hash = build['input']['gitilesCommit']['id']
build_bucket_id = build['id']
return build_bucket_hash, build_bucket_id
def _TranslateSiteName(site_name):
"""Translates complex site_name into array of necessary parts depending on
if it is a password or autofill type name.
Args:
site_name: For autofill, basic site_name, but for Password, a site_name
with password scenario prefix.
Returns:
A list containing the parsed pieces describing the scenario & site_name.
"""
password_prefixes = [
'sign_up_fill', 'sign_up_pass', 'sign_in_pass', 'capture_update_pass'
]
for password_prefix in password_prefixes:
if site_name.startswith(password_prefix + '_'):
return [password_prefix, site_name[len(password_prefix + '_'):]]
return [site_name]
def DoBisect(bad_hash, good_hash, site_name, print_only=False):
"""Takes the comparison hashes and a site_name and peforms a local bisect
to find the culprit CL which first causes the test for this site to fail.
Args:
bad_hash: The hash of a failing build.
good_hash: The hash of the last known prior good build.
site_name: A list of distinguishing site parts. For autofill, simply
the [site_name], but for password, this would be
[scenario_dir, site_name]. Consider _TranslateSiteName to
build this properly.
print_only: A boolean that if True, only print the pieces that would go
into a potential bisect process, instead of actually
initiating one.
"""
site_name = _TranslateSiteName(site_name)
print('Translated to:', site_name)
build_command = captured_sites_commands.initiate_command('build')
build_command.build(['-r'])
build_command_text = build_command.print()
run_command = captured_sites_commands.initiate_command('run')
# -r:Use Release version, -b:Run In Background', -u:Use Bot Timeout (3 min).
run_command.build(['-r', '-b', '-u'] + site_name)
run_command_text = run_command.print()
print(f'Will bisect from {good_hash} to {bad_hash}.')
print(f'Will build using:\n{build_command_text}')
print(f'Will run using:\n{run_command_text}')
if print_only:
print('print_only is set, exiting before bisect begins.')
return
bisect_gtests.StartBisect(good_hash, bad_hash, build_command_text,
run_command_text)
def GetBuildInfo(bot_name, build_number, site_name=None):
"""Given a bot_name and build_number, returns the info necessary to perform a
bisect of any novel changes between that build and the previous one.
Args:
bot_name: An acceptable short bot_name. See _GetBotName for more detail.
build_number: The 'bad' build number from which to find novel failures.
site_name: An optional site_name to force usage of. This likely will
want to be from new site failures in the given build, but is
not enforced so.
Returns:
bad_hash: Buildbucket hash from the given 'bad' build number.
good_hash: Buildbucket hash from the prior 'good' build.
site_name: Either the given optional override, or a single novel failure
that occurred between the given build_number and the prior.
"""
full_bot_name = _GetBotName(bot_name)
first_bad_number = build_number
last_good_number = build_number - 1
try:
bad_hash, bad_id = _RetrieveBuildbucketInfo(full_bot_name, first_bad_number)
good_hash, good_id = _RetrieveBuildbucketInfo(full_bot_name,
last_good_number)
except Exception as e:
print('Unable to retrieve build bucket info for builds to compare:', e)
return None, None, None
try:
possible_site_regressions = _FindSiteRegressions(bad_id, good_id)
except Exception as e:
print('Unable to retrieve site failures from given buildbucket ids:', e)
return None, None, None
if not possible_site_regressions:
print(
f'Compared build numbers {first_bad_number} and {last_good_number}, but'
' found no clear site regressions. Make sure to add the first builder'
' that failed as the input. Also note this script cannot handle'
' bisecting infra failures.')
else:
print('All Site Regressions (%d):%s' %
(len(possible_site_regressions), ' '.join(possible_site_regressions)))
if site_name:
if site_name not in possible_site_regressions:
print(f'WARNING: given site "{site_name}" did not show as possbile site'
' regression but was given as override choice.')
elif possible_site_regressions:
site_name = possible_site_regressions[0]
print(f'Choosing Site:"{site_name}".')
return bad_hash, good_hash, site_name
def main():
options = _ParseCommandLine(sys.argv[1:])
bad_hash, good_hash, site_name = GetBuildInfo(options.bot_name,
options.build_number,
options.site_name)
if not bad_hash or not good_hash or not site_name:
print(f'Unable to gather enough info for a bisect, as do not have a'
f' good hash:"{good_hash}",'
f' bad hash:"{bad_hash}",'
f' and/or site name:"{site_name}"".')
return 1
DoBisect(bad_hash, good_hash, site_name, options.print_only)
return 0
if __name__ == '__main__':
sys.exit(main())
|