File: pull_logs.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (413 lines) | stat: -rwxr-xr-x 14,549 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
#!/usr/bin/env python3

# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import argparse
import glob
import json
import os
import subprocess
import sys
import tempfile
"""
This script connects to Buildbucket to pull the logs from all tryjobs for a
gerrit cl, and writes them to local files.
Since logs tend to be very large, it can also filter them, only writing lines
of interest.

See README.md in this directory for more details.
"""

bb = "bb.bat" if os.name == 'nt' else "bb"

# Types of builder which don't compile, and which we therefore ignore
ignored_recipes = [
    # Calls another builder to do the compilation
    "chromium/orchestrator",
    # No compilation at all
    "presubmit",
]

# List of all ToT builder. Generated by running
# bb builders chromium/ci | grep /ToT
# then adding a few to the end by looking at the chromium.clang dashboard
ToT_builders = [
    "chromium/ci/ToTAndroid",
    "chromium/ci/ToTAndroid (dbg)",
    "chromium/ci/ToTAndroid x64",
    "chromium/ci/ToTAndroid x86",
    "chromium/ci/ToTAndroid64",
    "chromium/ci/ToTAndroidASan",
    "chromium/ci/ToTAndroidCoverage x86",
    "chromium/ci/ToTAndroidOfficial",
    "chromium/ci/ToTChromeOS",
    "chromium/ci/ToTChromeOS (dbg)",
    "chromium/ci/ToTFuchsia x64",
    "chromium/ci/ToTFuchsiaOfficial arm64",
    "chromium/ci/ToTLinux",
    "chromium/ci/ToTLinux (dbg)",
    "chromium/ci/ToTLinuxASan",
    "chromium/ci/ToTLinuxASanLibfuzzer",
    "chromium/ci/ToTLinuxCoverage",
    "chromium/ci/ToTLinuxMSan",
    "chromium/ci/ToTLinuxPGO",
    "chromium/ci/ToTLinuxTSan",
    "chromium/ci/ToTLinuxUBSanVptr",
    "chromium/ci/ToTMac",
    "chromium/ci/ToTMac (dbg)",
    "chromium/ci/ToTMacASan",
    "chromium/ci/ToTMacArm64",
    "chromium/ci/ToTMacArm64PGO",
    "chromium/ci/ToTMacCoverage",
    "chromium/ci/ToTMacPGO",
    "chromium/ci/ToTWin",
    "chromium/ci/ToTWin(dbg)",
    "chromium/ci/ToTWin(dll)",
    "chromium/ci/ToTWin64",
    "chromium/ci/ToTWin64(dbg)",
    "chromium/ci/ToTWin64(dll)",
    "chromium/ci/ToTWin64PGO",
    "chromium/ci/ToTWinASanLibfuzzer",
    "chromium/ci/ToTWinArm64PGO",
    "chromium/ci/ToTWindowsCoverage",
    "chromium/ci/ToTiOS",
    "chromium/ci/ToTiOSDevice",
    "chromium/ci/CFI Linux CF",
    "chromium/ci/CFI Linux ToT",
    "chromium/ci/linux-win-cross-clang-tot-rel",
    "chromium/ci/CrWinAsan",
    "chromium/ci/CrWinAsan(dll)",
]

verbose = False


def log(msg):
    """
    Print a string for monitoring or debugging purposes, only if
    we're in verbose mode.
    """
    if verbose:
        print(msg)


def parse_args(args):
    """
    Parse the user's command-line options. Possible flags:

    log-dir: Where to store the downloaded log files.
    cl: The number of the cl to look up.
    patchset: The number of the patchset to download logs for.
    step-names: A list of possible build step names to download logs for.
                If multiple, logs will be pulled for the first one that exists.
    filter: A predicate on lines in the log. Lines that return false are removed
            before saving the log.
    """
    # Note: For local usage, it's often more convenient to edit these defaults
    # than to use the cli arguments, especially if you want a custom filter.
    default_config = {
        "log_dir": None,
        "cl": 0,
        "patchset": 0,
        "step_names": [
            "compile (with patch)", "compile", "compile (without patch)",
            "run coverage script"
        ],
        "filter": lambda s: not s.startswith("["),
    }

    parser = argparse.ArgumentParser(description=__doc__,)
    parser.add_argument("-c",
                        "--cl",
                        type=int,
                        default=default_config["cl"],
                        help="CL number whose logs should be pulled.")
    parser.add_argument("-p",
                        "--patchset",
                        type=int,
                        default=default_config["patchset"],
                        help="Patchset number whose logs should be pulled.")
    parser.add_argument(
        "-t",
        "--tot",
        action="store_true",
        help="If passed, pull scripts from all the ToT bots (as defined at "
        "the top of the script) instead of from a specific CL. Useful for "
        "debugging new warnings when gardening clang. "
        "Overrides --cl and --patchset.")
    parser.add_argument(
        "-l",
        "-o",
        "--log-dir",
        "--out-dir",
        type=str,
        default=default_config["log_dir"],
        help="Absolute path to a directory to store the downloaded logs. "
        "Will be created if it doesn't exist. "
        "Include a trailing slash.")
    parser.add_argument("-s",
                        "--step",
                        type=str,
                        action="append",
                        default=default_config["step_names"],
                        help="Name of the build step to pull logs for. "
                        "May be specified multiple times; logs are pulled "
                        "for each step in order until one succeeds.")
    parser.add_argument(
        "-f",
        "--filter",
        action="store_true",
        help="If true, strip uninteresting build lines (those which begin "
        "with '[').")
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="If passed, print additional logging information for moitoring "
        "or debugging purposes.")

    handle_existing = parser.add_mutually_exclusive_group()
    handle_existing.add_argument(
        "-d",
        "--delete-logs",
        action="store_true",
        help="If passed, delete existing txt files from the log directory. "
        "Mutually exclusive with --resume.")
    handle_existing.add_argument(
        "-r",
        "--resume",
        action="store_true",
        help="If passed, don't download logs that are already present in the"
        "output directory. Useful if the previous download got interrupted. "
        "Mutually exclusive with --delete-logs.")

    parsed_args = vars(parser.parse_args(args))

    # Validate and/or the parsed args before returning.
    if (not parsed_args["tot"] and parsed_args["cl"] <= 0):
        raise ValueError("You must enter a real CL number")
    if (not parsed_args["tot"] and parsed_args["patchset"] <= 0):
        raise ValueError("You must enter a real patchset number")

    if parsed_args["filter"]:
        parsed_args["filter"] = default_config["filter"]
    else:
        parsed_args["filter"] = lambda _: True

    if not parsed_args["log_dir"]:
        parsed_args["log_dir"] = tempfile.mkdtemp(prefix="pulled_logs_")

    global verbose
    verbose = parsed_args["verbose"]

    return parsed_args


def identify_builds(cl_id, patchset):
    """
    Use the bb tool to retrieve list of builds associated with this cl and
    patchset. Only return builds associated with the most recent run.
    """
    cl_str = ("https://chromium-review.googlesource.com/"
              "c/chromium/src/+/{}/{}".format(cl_id, patchset))

    # Make sure we're only getting the most recent set of builds by grabbing the
    # cq_attempt_key tag from the first build returned. If the tag isn't present
    # it means that build was triggered manually, so keep trying until we find
    # one that has the tag.
    # This strategy relies on the fact that that builds are returned in reverse
    # chronological order.
    num_builds_to_check = 10
    most_recent_builds = subprocess.run(
        [bb, "ls", "-cl", cl_str, "-" + str(num_builds_to_check), "-json"],
        check=True,
        stdout=subprocess.PIPE,
        text=True)

    if (len(most_recent_builds.stdout) == 0):
        raise RuntimeError("Couldn't find any builds. Did you use a valid "
                           "cl_id AND patchset number?")

    output = [
        json.loads(build) for build in most_recent_builds.stdout.splitlines()
    ]
    cq_attempt_key = None
    for i in range(0, num_builds_to_check - 1):
        for tag in output[i]["tags"]:
            if tag["key"] == "cq_attempt_key":
                cq_attempt_key = tag["value"]
                break
    if not cq_attempt_key:
        raise RuntimeError(
            "None of the {} most recent builds were associated with a CQ run. "
            "Did you launch a bunch of manual builds after hitting the button?".
            format(num_builds_to_check))

    # Grab the info for all builds in the most recent set
    build_list = subprocess.run([
        bb, "ls", "-cl", cl_str, "-json", "-fields", "input", "-t",
        "cq_attempt_key:" + cq_attempt_key
    ],
                                check=True,
                                stdout=subprocess.PIPE,
                                text=True)
    if (len(build_list.stdout) == 0):
        raise RuntimeError("Somehow couldn't find any builds the second time.")

    # Retrieve the name and id of each build
    parsed_builds = [
        json.loads(build) for build in build_list.stdout.splitlines()
    ]

    target_builds = [
        (build["builder"]["builder"], build["id"])
        for build in parsed_builds
        if build["input"]["properties"]["recipe"] not in ignored_recipes
    ]

    log("Found {} target builds".format(len(target_builds)))
    return target_builds


def identify_tot_builds():
    """
    Use the bb tool to retrieve the information for the most recent builds of
    each tot bot.
    """
    target_builds = []
    for bot in ToT_builders:
        build_info = subprocess.run([
            bb, "ls", "-json", "-fields", "input", "-1", "-status", "ended", bot
        ],
                                    check=True,
                                    stdout=subprocess.PIPE,
                                    text=True)
        if (len(build_info.stdout) == 0):
            raise RuntimeError("Couldn't find any builds for " + bot)

        build = json.loads(build_info.stdout)

        if build["input"]["properties"]["recipe"] not in ignored_recipes:
            target_builds.append((build["builder"]["builder"], build["id"]))

    log("Found {} ToT builds".format(len(target_builds)))
    return target_builds


def try_pull_step(build_id, step_names):
    """
    Try to pull each possible step name until one works or we've tried them all.
    If one is successfully pulled, return the incoming data as a stream.
    """
    for step_name in step_names:
        output = subprocess.Popen([bb, "log", build_id, step_name],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.STDOUT,
                                  text=True)
        first_line = output.stdout.readline()
        if first_line.startswith("step \"{}\" not found".format(step_name)):
            continue

        return output, first_line

    return None


def write_line(filter_fun, file, line):
    """
    Write a line to a file if it passes the filter.
    """
    if filter_fun(line):
        file.write(line + "\n")


# Pull the compilation logs, and filter them, only writing lines of interest.
def pull_and_filter_logs(parsed_args, target_builds):
    """
    Pull the compilation logs for each identifier builder. Strip uninteresting
    lines before saving to disk.

    Note that this will create the output directory if it doesn't exist.
    """
    # Keep track of any builders which we unexpectedly failed to pull logs for.
    failures = []  # Completely failed (e.g. step didn't exist)
    partial_logs = []  # Partial failure (e.g. builder died mid-compilation)

    log_dir = parsed_args["log_dir"]
    try:
        os.mkdir(log_dir)
    except FileExistsError:
        pass

    print("Storing logs in " + os.path.abspath(log_dir))

    if parsed_args["delete_logs"]:
        for f in glob.glob(os.path.join(log_dir, "*.txt")):
            os.remove(f)

    for name, build_id in target_builds:
        output_file = os.path.join(log_dir, name + ".txt")

        if parsed_args["resume"] and os.path.isfile(output_file):
            log("Log for {} already exists, skipping".format(name))
            continue

        log("Pulling logs for " + name)

        pulled_result = try_pull_step(build_id, parsed_args["step"])
        if not pulled_result:
            log("  Failed to pull logs for " + name)
            failures.append(name + " ({})".format(build_id) + "\n")
            continue

        output, first_line = pulled_result

        with open(output_file, "w") as file:
            write_line(parsed_args["filter"], file, first_line)
            for line in output.stdout:
                # If the builder died mid-compilation, bb may stop returning
                # data partway through, and just start printing an error message
                # every 5 seconds instead.
                if "No logs returned" in line:
                    log("  Only pulled partial log for " + name)
                    partial_logs.append(name + " ({})".format(build_id) + "\n")
                    output.kill()
                    write_line(
                        lambda _: True, file,
                        "Failed to pull entire log for {} ({})".format(
                            name, build_id))
                    break
                write_line(parsed_args["filter"], file, line)
    return failures, partial_logs


def main(args):
    parsed_args = parse_args(args)
    if (parsed_args["tot"]):
        builds = identify_tot_builds()
    else:
        builds = identify_builds(parsed_args["cl"], parsed_args["patchset"])
    failures, partial_logs = pull_and_filter_logs(parsed_args, builds)

    if len(failures) > 0:
        sys.stderr.write(
            "Unexpectedly failed to pull logs for the following builders.\n"
            "They likely failed before the compile step (often this means an "
            "infra failure).\n"
            "You might want to download logs from the last valid build "
            "manually:\n")
        for failure in sorted(failures):
            sys.stderr.write(failure)

    if len(partial_logs) > 0:
        sys.stderr.write(
            "Only pulled partial logs for the following builders. You might "
            "want to download them manually and/or re-run the builders:\n")
        for failure in sorted(partial_logs):
            sys.stderr.write(failure)


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))