1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
|
#!/usr/bin/env python3
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import collections
import json
import os
import re
import sys
"""
This script parses all the log files in a directory, looking for instances
of a particular warning. It collects all the ones it finds, and writes the
results to an output file, recording which files had warnings, and the
location(s) in each file. It also counts the total number of files/warnings.
It can be configured to either print a (somewhat) human-readable list of files
and locations, or a more structured json for automatic processing.
See README.md in this directory for more details.
"""
def parse_args(args):
"""
Parse commandline flags. Possible options:
Configuration options:
log_dir : The directory containing the log files to scrape, or just
a single build log.
output : Where the collected warning information should go. Either the
string "stdout" (case-insensitive) or a path to a file.
warning_text: The text in the log indicating a warning was raised.
summarize: If present, we output a human-readable summary.
Otherwise, we output a json with more information.
print-links: If present, try to provide a direct link to the first warning
in each file on chromium codesearch.
"""
parser = argparse.ArgumentParser(description=__doc__,)
parser.add_argument("-l",
"--log-dir",
required=True,
type=str,
help="Path to the directory containing the build logs, "
"or to a single build log.")
parser.add_argument("-o",
"--output",
required=True,
type=str,
help="Where the collected warning information should "
"go. This should be either the string 'stdout', a dash "
"(also meaning stdout), or a path to a file.\n"
"ex. -o out.txt, -o stdout, -o -")
parser.add_argument("-w",
"--warning",
type=str,
required=True,
help="Text indicating the warning of interest. "
"Should appear at the end of a line containing the "
"filename and warning location.\n"
"ex. -w [-Wthread-safety-reference-return]")
parser.add_argument(
"-s",
"--summarize",
action="store_true",
help="If present, output a (somewhat) human-readable text file "
"cataloguing the warnings. Otherwise, output a json file "
"with more detailed information about each instance.")
parser.add_argument(
"-k",
"--print_links",
action="store_true",
help="If present, attempt to provide direct links to codesearch for "
"the first warning in each file. Files which don't directly correspond "
"to anything, such as generated files, print the filename instead.")
parsed_args = vars(parser.parse_args(args))
return parsed_args
_TARGET_RE = re.compile('([^:(]+)(?:[:(])([0-9]+)(?::|, ?)([0-9]+)\)?:')
def make_codesearch_link(file, line):
"""
Construct a codesearch link to the specified position in the file, to
easily inspect the site of the warning.
"""
if not file.startswith("../../"):
# Probably a generated file, can't construct a good link automatically
return file
return "https://crsrc.org/{};l={}".format(file.removeprefix('../../'), line)
def extract_warning_location(line):
"""
Given a line of the build log indicating that a warning has occurred,
extract the file name and position of the warning (line # + col #).
"""
# Matches:
# |/path/to/file(123, 45):...|, for Windows
# |/path/to/file:123:45:...|, elsewhere
# Captures path, line number, and column number.
match = _TARGET_RE.match(line)
if not match:
return None
path, line, col = match.groups()
return os.path.normpath(path), int(line), int(col)
def collect_warning(summarize, print_links, log_name, log_file, collection,
warning_info):
"""
Add information about a warning into our collection, avoiding
duplicates and merging as necessary.
`collection` is expected to be a dictionary mapping log file names to the
warning info generated in the file (the empty list, by default).
If we're summarizing, we just collect the line and column number of each
warning.
If we're not summarizing, we also store the name of the log file (so we know
which systems the warning occurs on), and the next line of the log file
(which contains the text of the line, in case line numbers change later.)
"""
path, line_num, col_num = warning_info
# If we're collecting a summary, we just need the line and column numbers
if summarize:
logged_info = line_num, col_num
if logged_info not in collection[path]:
# Haven't seen this particular warning before
collection[path].append(logged_info)
return
# If we're not summarizing, we store extra info:
# 1. The next (nonempty) line, and
# 2. the name of the log that the warning occurred in
next_line = next(log_file)
while "|" not in next_line:
next_line = next(log_file)
log_name = os.path.basename(log_name)
if print_links:
logged_info = (line_num, col_num, make_codesearch_link(path, line_num),
next_line.split("|")[1].strip(), [log_name])
else:
logged_info = (line_num, col_num, next_line.split("|")[1].strip(),
[log_name])
# Should be either a singleton or empty
existing_info = [
x for x in collection[path]
if x[0] == logged_info[0] and x[1] == logged_info[1]
]
if len(existing_info) == 0:
# Haven't seen this particular warning before
collection[path].append(logged_info)
return
# If the info's already in the list, then just note the name of the log file
# It's possible for the same warning to appear multiple times in a file
if log_name not in existing_info[0][-1]:
existing_info[0][-1].append(log_name)
return
def read_file(filename, warning_text, summarize, print_links, collection,
failures):
"""
Go through a single build log, collecting all the warnings that occurred and
storing them in `collection`. Also keep track of any lines we tried to get
information from but failed (this shouldn't happen).
"""
with open(filename) as file:
for line in file:
if not line.rstrip().endswith(warning_text):
continue
warning_info = extract_warning_location(line)
if not warning_info:
builder_name, _ = os.path.splitext(os.path.basename(filename))
failures.append("{}: {}".format(builder_name, line))
continue
collect_warning(summarize, print_links, filename, file, collection,
warning_info)
def log_output(summarize, print_links, collection, output):
"""
Write the results of the collection to the output.
If a summary was requested, output a text summary.
Otherwise, dump to json.
"""
output_to_stdout = (output == "-" or output.lower() == "stdout")
if output_to_stdout:
output_file = sys.stdout
else:
output_file = open(output, "w")
print("Writing output to " + os.path.abspath(output))
if not summarize:
json.dump(collection, output_file, indent=2, sort_keys=True)
return
keys = list(collection.keys())
hits = 0
for key in sorted(keys):
values = collection[key]
hits += len(values)
padding = " "
if print_links:
key = make_codesearch_link(key, values[0][0])
padding = "\n "
output_file.write("{}{}({} hits): {}\n".format(key, padding,
str(len(values)),
str(values)))
output_file.write("\nTotal Files: {}, Total Hits: {}".format(
len(keys), hits))
if not output_to_stdout:
output_file.close()
def main(args):
parsed_args = parse_args(args)
try:
log_files = [
os.path.join(parsed_args["log_dir"], f)
for f in os.listdir(parsed_args["log_dir"])
]
except NotADirectoryError:
# Assume the argument was the (one) file to read.
log_files = [parsed_args["log_dir"]]
collection = collections.defaultdict(list)
failures = []
for file in log_files:
read_file(file, parsed_args["warning"], parsed_args["summarize"],
parsed_args["print_links"], collection, failures)
items = collection.copy().items()
for path, locs in items:
collection[path] = sorted(locs)
log_output(parsed_args["summarize"], parsed_args["print_links"], collection,
parsed_args["output"])
if failures:
sys.stderr.write(
"\nFound lines with an unexpected format but the right ending:")
for line in failures:
sys.stderr.write("\n" + line)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
|