1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
|
#!/usr/bin/env python3
"""
Compare GNU test results between current run and reference to identify
regressions and fixes.
Arguments:
CURRENT_JSON Path to the current run's aggregated results JSON file
REFERENCE_JSON Path to the reference (main branch) aggregated
results JSON file
--ignore-file Path to file containing list of tests to ignore
(for intermittent issues)
--output Path to output file for GitHub comment content
"""
import argparse
import json
import os
import sys
def flatten_test_results(results):
"""Convert nested JSON test results to a flat dictionary of test paths to statuses."""
flattened = {}
for util, tests in results.items():
for test_name, status in tests.items():
# Build the full test path
test_path = f"tests/{util}/{test_name}"
# Remove the .log extension
test_path = test_path.replace(".log", "")
flattened[test_path] = status
return flattened
def load_ignore_list(ignore_file):
"""Load list of tests to ignore from file."""
if not os.path.exists(ignore_file):
return set()
with open(ignore_file, "r") as f:
return {line.strip() for line in f if line.strip() and not line.startswith("#")}
def identify_test_changes(current_flat, reference_flat):
"""
Identify different categories of test changes between current and reference results.
Args:
current_flat (dict): Flattened dictionary of current test results
reference_flat (dict): Flattened dictionary of reference test results
Returns:
tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests
"""
# Find regressions (tests that were passing but now failing)
regressions = []
for test_path, status in current_flat.items():
if status in ("FAIL", "ERROR"):
if test_path in reference_flat:
if reference_flat[test_path] == "PASS":
regressions.append(test_path)
# Find fixes (tests that were failing but now passing)
fixes = []
for test_path, status in reference_flat.items():
if status in ("FAIL", "ERROR"):
if test_path in current_flat:
if current_flat[test_path] == "PASS":
fixes.append(test_path)
# Find newly skipped tests (were passing, now skipped)
newly_skipped = []
for test_path, status in current_flat.items():
if (
status == "SKIP"
and test_path in reference_flat
and reference_flat[test_path] == "PASS"
):
newly_skipped.append(test_path)
# Find newly passing tests (were skipped, now passing)
newly_passing = []
for test_path, status in current_flat.items():
if (
status == "PASS"
and test_path in reference_flat
and reference_flat[test_path] == "SKIP"
):
newly_passing.append(test_path)
# Find newly failing tests (were skipped, now failing)
newly_failing = []
for test_path, status in current_flat.items():
if (
status in ("FAIL", "ERROR")
and test_path in reference_flat
and reference_flat[test_path] == "SKIP"
):
newly_failing.append(test_path)
return regressions, fixes, newly_skipped, newly_passing, newly_failing
def main():
parser = argparse.ArgumentParser(
description="Compare GNU test results and identify regressions and fixes"
)
parser.add_argument("current_json", help="Path to current run JSON results")
parser.add_argument("reference_json", help="Path to reference JSON results")
parser.add_argument(
"--ignore-file",
required=True,
help="Path to file with tests to ignore (for intermittent issues)",
)
parser.add_argument("--output", help="Path to output file for GitHub comment")
args = parser.parse_args()
# Load test results
try:
with open(args.current_json, "r") as f:
current_results = json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
sys.stderr.write(f"Error loading current results: {e}\n")
return 1
try:
with open(args.reference_json, "r") as f:
reference_results = json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
sys.stderr.write(f"Error loading reference results: {e}\n")
sys.stderr.write("Skipping comparison as reference is not available.\n")
return 0
# Load ignore list (required)
if not os.path.exists(args.ignore_file):
sys.stderr.write(f"Error: Ignore file {args.ignore_file} does not exist\n")
return 1
ignore_list = load_ignore_list(args.ignore_file)
print(f"Loaded {len(ignore_list)} tests to ignore from {args.ignore_file}")
# Flatten result structures for easier comparison
current_flat = flatten_test_results(current_results)
reference_flat = flatten_test_results(reference_results)
# Identify different categories of test changes
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current_flat, reference_flat)
)
# Filter out intermittent issues from regressions
real_regressions = [r for r in regressions if r not in ignore_list]
intermittent_regressions = [r for r in regressions if r in ignore_list]
# Filter out intermittent issues from fixes
real_fixes = [f for f in fixes if f not in ignore_list]
intermittent_fixes = [f for f in fixes if f in ignore_list]
# Filter out intermittent issues from newly failing
real_newly_failing = [n for n in newly_failing if n not in ignore_list]
intermittent_newly_failing = [n for n in newly_failing if n in ignore_list]
# Print summary stats
print(f"Total tests in current run: {len(current_flat)}")
print(f"Total tests in reference: {len(reference_flat)}")
print(f"New regressions: {len(real_regressions)}")
print(f"Intermittent regressions: {len(intermittent_regressions)}")
print(f"Fixed tests: {len(real_fixes)}")
print(f"Intermittent fixes: {len(intermittent_fixes)}")
print(f"Newly skipped tests: {len(newly_skipped)}")
print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}")
print(f"Intermittent newly failing: {len(intermittent_newly_failing)}")
output_lines = []
# Report regressions
if real_regressions:
print("\nREGRESSIONS (non-intermittent failures):", file=sys.stderr)
for test in sorted(real_regressions):
msg = f"GNU test failed: {test}. {test} is passing on 'main'. Maybe you have to rebase?"
print(f"::error ::{msg}", file=sys.stderr)
output_lines.append(msg)
# Report intermittent issues (regressions)
if intermittent_regressions:
print("\nINTERMITTENT ISSUES (ignored regressions):", file=sys.stderr)
for test in sorted(intermittent_regressions):
msg = f"Skip an intermittent issue {test} (fails in this run but passes in the 'main' branch)"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
# Report intermittent issues (fixes)
if intermittent_fixes:
print("\nINTERMITTENT ISSUES (ignored fixes):", file=sys.stderr)
for test in sorted(intermittent_fixes):
msg = f"Skipping an intermittent issue {test} (passes in this run but fails in the 'main' branch)"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
# Report fixes
if real_fixes:
print("\nFIXED TESTS:", file=sys.stderr)
for test in sorted(real_fixes):
msg = f"Congrats! The gnu test {test} is no longer failing!"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
# Report newly skipped and passing tests
if newly_skipped:
print("\nNEWLY SKIPPED TESTS:", file=sys.stderr)
for test in sorted(newly_skipped):
msg = f"Note: The gnu test {test} is now being skipped but was previously passing."
print(f"::warning ::{msg}", file=sys.stderr)
output_lines.append(msg)
if newly_passing:
print("\nNEWLY PASSING TESTS (previously skipped):", file=sys.stderr)
for test in sorted(newly_passing):
msg = f"Congrats! The gnu test {test} is now passing!"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
# Report newly failing tests (were skipped, now failing)
if real_newly_failing:
print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr)
for test in sorted(real_newly_failing):
msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing."
print(f"::warning ::{msg}", file=sys.stderr)
output_lines.append(msg)
if intermittent_newly_failing:
print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr)
for test in sorted(intermittent_newly_failing):
msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
if args.output and output_lines:
with open(args.output, "w") as f:
for line in output_lines:
f.write(f"{line}\n")
# Return exit code based on whether we found regressions
return 1 if real_regressions else 0
if __name__ == "__main__":
sys.exit(main())
|