File: collect_warnings.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (266 lines) | stat: -rwxr-xr-x 9,676 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python3

# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import argparse
import collections
import json
import os
import re
import sys
"""
This script parses all the log files in a directory, looking for instances
of a particular warning. It collects all the ones it finds, and writes the
results to an output file, recording which files had warnings, and the
location(s) in each file. It also counts the total number of files/warnings.

It can be configured to either print a (somewhat) human-readable list of files
and locations, or a more structured json for automatic processing.

See README.md in this directory for more details.
"""


def parse_args(args):
    """
    Parse commandline flags. Possible options:

    Configuration options:
    log_dir :     The directory containing the log files to scrape, or just
                  a single build log.
    output :      Where the collected warning information should go. Either the
                  string "stdout" (case-insensitive) or a path to a file.
    warning_text: The text in the log indicating a warning was raised.
    summarize:    If present, we output a human-readable summary.
                  Otherwise, we output a json with more information.
    print-links:  If present, try to provide a direct link to the first warning
                  in each file on chromium codesearch.
    """
    parser = argparse.ArgumentParser(description=__doc__,)
    parser.add_argument("-l",
                        "--log-dir",
                        required=True,
                        type=str,
                        help="Path to the directory containing the build logs, "
                        "or to a single build log.")
    parser.add_argument("-o",
                        "--output",
                        required=True,
                        type=str,
                        help="Where the collected warning information should "
                        "go. This should be either the string 'stdout', a dash "
                        "(also meaning stdout), or a path to a file.\n"
                        "ex. -o out.txt, -o stdout, -o -")
    parser.add_argument("-w",
                        "--warning",
                        type=str,
                        required=True,
                        help="Text indicating the warning of interest. "
                        "Should appear at the end of a line containing the "
                        "filename and warning location.\n"
                        "ex. -w [-Wthread-safety-reference-return]")
    parser.add_argument(
        "-s",
        "--summarize",
        action="store_true",
        help="If present, output a (somewhat) human-readable text file "
        "cataloguing the warnings. Otherwise, output a json file "
        "with more detailed information about each instance.")
    parser.add_argument(
        "-k",
        "--print_links",
        action="store_true",
        help="If present, attempt to provide direct links to codesearch for "
        "the first warning in each file. Files which don't directly correspond "
        "to anything, such as generated files, print the filename instead.")

    parsed_args = vars(parser.parse_args(args))

    return parsed_args


_TARGET_RE = re.compile('([^:(]+)(?:[:(])([0-9]+)(?::|, ?)([0-9]+)\)?:')


def make_codesearch_link(file, line):
    """
    Construct a codesearch link to the specified position in the file, to
    easily inspect the site of the warning.
    """
    if not file.startswith("../../"):
        # Probably a generated file, can't construct a good link automatically
        return file

    return "https://crsrc.org/{};l={}".format(file.removeprefix('../../'), line)


def extract_warning_location(line):
    """
    Given a line of the build log indicating that a warning has occurred,
    extract the file name and position of the warning (line # + col #).
    """
    # Matches:
    # |/path/to/file(123, 45):...|, for Windows
    # |/path/to/file:123:45:...|, elsewhere
    # Captures path, line number, and column number.
    match = _TARGET_RE.match(line)
    if not match:
        return None
    path, line, col = match.groups()
    return os.path.normpath(path), int(line), int(col)


def collect_warning(summarize, print_links, log_name, log_file, collection,
                    warning_info):
    """
    Add information about a warning into our collection, avoiding
    duplicates and merging as necessary.

    `collection` is expected to be a dictionary mapping log file names to the
    warning info generated in the file (the empty list, by default).
    If we're summarizing, we just collect the line and column number of each
    warning.

    If we're not summarizing, we also store the name of the log file (so we know
    which systems the warning occurs on), and the next line of the log file
    (which contains the text of the line, in case line numbers change later.)
    """
    path, line_num, col_num = warning_info

    # If we're collecting a summary, we just need the line and column numbers
    if summarize:
        logged_info = line_num, col_num
        if logged_info not in collection[path]:
            # Haven't seen this particular warning before
            collection[path].append(logged_info)
        return

    # If we're not summarizing, we store extra info:
    # 1. The next (nonempty) line, and
    # 2. the name of the log that the warning occurred in
    next_line = next(log_file)
    while "|" not in next_line:
        next_line = next(log_file)

    log_name = os.path.basename(log_name)
    if print_links:
        logged_info = (line_num, col_num, make_codesearch_link(path, line_num),
                       next_line.split("|")[1].strip(), [log_name])
    else:
        logged_info = (line_num, col_num, next_line.split("|")[1].strip(),
                       [log_name])

    # Should be either a singleton or empty
    existing_info = [
        x for x in collection[path]
        if x[0] == logged_info[0] and x[1] == logged_info[1]
    ]

    if len(existing_info) == 0:
        # Haven't seen this particular warning before
        collection[path].append(logged_info)
        return

    # If the info's already in the list, then just note the name of the log file
    # It's possible for the same warning to appear multiple times in a file
    if log_name not in existing_info[0][-1]:
        existing_info[0][-1].append(log_name)
    return


def read_file(filename, warning_text, summarize, print_links, collection,
              failures):
    """
    Go through a single build log, collecting all the warnings that occurred and
    storing them in `collection`. Also keep track of any lines we tried to get
    information from but failed (this shouldn't happen).
    """
    with open(filename) as file:
        for line in file:
            if not line.rstrip().endswith(warning_text):
                continue

            warning_info = extract_warning_location(line)
            if not warning_info:
                builder_name, _ = os.path.splitext(os.path.basename(filename))
                failures.append("{}: {}".format(builder_name, line))
                continue

            collect_warning(summarize, print_links, filename, file, collection,
                            warning_info)


def log_output(summarize, print_links, collection, output):
    """
    Write the results of the collection to the output.
    If a summary was requested, output a text summary.
    Otherwise, dump to json.
    """

    output_to_stdout = (output == "-" or output.lower() == "stdout")

    if output_to_stdout:
        output_file = sys.stdout
    else:
        output_file = open(output, "w")
        print("Writing output to " + os.path.abspath(output))

    if not summarize:
        json.dump(collection, output_file, indent=2, sort_keys=True)
        return

    keys = list(collection.keys())
    hits = 0
    for key in sorted(keys):
        values = collection[key]
        hits += len(values)
        padding = " "
        if print_links:
            key = make_codesearch_link(key, values[0][0])
            padding = "\n    "
        output_file.write("{}{}({} hits): {}\n".format(key, padding,
                                                       str(len(values)),
                                                       str(values)))

    output_file.write("\nTotal Files: {}, Total Hits: {}".format(
        len(keys), hits))

    if not output_to_stdout:
        output_file.close()


def main(args):
    parsed_args = parse_args(args)
    try:
        log_files = [
            os.path.join(parsed_args["log_dir"], f)
            for f in os.listdir(parsed_args["log_dir"])
        ]
    except NotADirectoryError:
        # Assume the argument was the (one) file to read.
        log_files = [parsed_args["log_dir"]]

    collection = collections.defaultdict(list)
    failures = []
    for file in log_files:
        read_file(file, parsed_args["warning"], parsed_args["summarize"],
                  parsed_args["print_links"], collection, failures)

    items = collection.copy().items()
    for path, locs in items:
        collection[path] = sorted(locs)

    log_output(parsed_args["summarize"], parsed_args["print_links"], collection,
               parsed_args["output"])

    if failures:
        sys.stderr.write(
            "\nFound lines with an unexpected format but the right ending:")
        for line in failures:
            sys.stderr.write("\n" + line)


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))