File: compare_test_results.py

package info (click to toggle)
rust-coreutils 0.6.0-1
links: PTS, VCS
area: main
in suites: sid
size: 485,976 kB
sloc: ansic: 103,608; asm: 28,570; sh: 8,672; python: 5,662; makefile: 474; cpp: 97; javascript: 72
file content (250 lines) | stat: -rw-r--r-- 9,804 bytes
#!/usr/bin/env python3
"""
Compare GNU test results between current run and reference to identify
regressions and fixes.


Arguments:
    CURRENT_JSON       Path to the current run's aggregated results JSON file
    REFERENCE_JSON     Path to the reference (main branch) aggregated
                        results JSON file
    --ignore-file      Path to file containing list of tests to ignore
                        (for intermittent issues)
    --output           Path to output file for GitHub comment content
"""

import argparse
import json
import os
import sys


def flatten_test_results(results):
    """Convert nested JSON test results to a flat dictionary of test paths to statuses."""
    flattened = {}
    for util, tests in results.items():
        for test_name, status in tests.items():
            # Build the full test path
            test_path = f"tests/{util}/{test_name}"
            # Remove the .log extension
            test_path = test_path.replace(".log", "")
            flattened[test_path] = status
    return flattened


def load_ignore_list(ignore_file):
    """Load list of tests to ignore from file."""
    if not os.path.exists(ignore_file):
        return set()

    with open(ignore_file, "r") as f:
        return {line.strip() for line in f if line.strip() and not line.startswith("#")}


def identify_test_changes(current_flat, reference_flat):
    """
    Identify different categories of test changes between current and reference results.

    Args:
        current_flat (dict): Flattened dictionary of current test results
        reference_flat (dict): Flattened dictionary of reference test results

    Returns:
        tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests
    """
    # Find regressions (tests that were passing but now failing)
    regressions = []
    for test_path, status in current_flat.items():
        if status in ("FAIL", "ERROR"):
            if test_path in reference_flat:
                if reference_flat[test_path] == "PASS":
                    regressions.append(test_path)

    # Find fixes (tests that were failing but now passing)
    fixes = []
    for test_path, status in reference_flat.items():
        if status in ("FAIL", "ERROR"):
            if test_path in current_flat:
                if current_flat[test_path] == "PASS":
                    fixes.append(test_path)

    # Find newly skipped tests (were passing, now skipped)
    newly_skipped = []
    for test_path, status in current_flat.items():
        if (
            status == "SKIP"
            and test_path in reference_flat
            and reference_flat[test_path] == "PASS"
        ):
            newly_skipped.append(test_path)

    # Find newly passing tests (were skipped, now passing)
    newly_passing = []
    for test_path, status in current_flat.items():
        if (
            status == "PASS"
            and test_path in reference_flat
            and reference_flat[test_path] == "SKIP"
        ):
            newly_passing.append(test_path)

    # Find newly failing tests (were skipped, now failing)
    newly_failing = []
    for test_path, status in current_flat.items():
        if (
            status in ("FAIL", "ERROR")
            and test_path in reference_flat
            and reference_flat[test_path] == "SKIP"
        ):
            newly_failing.append(test_path)

    return regressions, fixes, newly_skipped, newly_passing, newly_failing


def main():
    parser = argparse.ArgumentParser(
        description="Compare GNU test results and identify regressions and fixes"
    )
    parser.add_argument("current_json", help="Path to current run JSON results")
    parser.add_argument("reference_json", help="Path to reference JSON results")
    parser.add_argument(
        "--ignore-file",
        required=True,
        help="Path to file with tests to ignore (for intermittent issues)",
    )
    parser.add_argument("--output", help="Path to output file for GitHub comment")

    args = parser.parse_args()

    # Load test results
    try:
        with open(args.current_json, "r") as f:
            current_results = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        sys.stderr.write(f"Error loading current results: {e}\n")
        return 1

    try:
        with open(args.reference_json, "r") as f:
            reference_results = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        sys.stderr.write(f"Error loading reference results: {e}\n")
        sys.stderr.write("Skipping comparison as reference is not available.\n")
        return 0

    # Load ignore list (required)
    if not os.path.exists(args.ignore_file):
        sys.stderr.write(f"Error: Ignore file {args.ignore_file} does not exist\n")
        return 1

    ignore_list = load_ignore_list(args.ignore_file)
    print(f"Loaded {len(ignore_list)} tests to ignore from {args.ignore_file}")

    # Flatten result structures for easier comparison
    current_flat = flatten_test_results(current_results)
    reference_flat = flatten_test_results(reference_results)

    # Identify different categories of test changes
    regressions, fixes, newly_skipped, newly_passing, newly_failing = (
        identify_test_changes(current_flat, reference_flat)
    )

    # Filter out intermittent issues from regressions
    real_regressions = [r for r in regressions if r not in ignore_list]
    intermittent_regressions = [r for r in regressions if r in ignore_list]

    # Filter out intermittent issues from fixes
    real_fixes = [f for f in fixes if f not in ignore_list]
    intermittent_fixes = [f for f in fixes if f in ignore_list]

    # Filter out intermittent issues from newly failing
    real_newly_failing = [n for n in newly_failing if n not in ignore_list]
    intermittent_newly_failing = [n for n in newly_failing if n in ignore_list]

    # Print summary stats
    print(f"Total tests in current run: {len(current_flat)}")
    print(f"Total tests in reference: {len(reference_flat)}")
    print(f"New regressions: {len(real_regressions)}")
    print(f"Intermittent regressions: {len(intermittent_regressions)}")
    print(f"Fixed tests: {len(real_fixes)}")
    print(f"Intermittent fixes: {len(intermittent_fixes)}")
    print(f"Newly skipped tests: {len(newly_skipped)}")
    print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
    print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}")
    print(f"Intermittent newly failing: {len(intermittent_newly_failing)}")

    output_lines = []

    # Report regressions
    if real_regressions:
        print("\nREGRESSIONS (non-intermittent failures):", file=sys.stderr)
        for test in sorted(real_regressions):
            msg = f"GNU test failed: {test}. {test} is passing on 'main'. Maybe you have to rebase?"
            print(f"::error ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    # Report intermittent issues (regressions)
    if intermittent_regressions:
        print("\nINTERMITTENT ISSUES (ignored regressions):", file=sys.stderr)
        for test in sorted(intermittent_regressions):
            msg = f"Skip an intermittent issue {test} (fails in this run but passes in the 'main' branch)"
            print(f"::notice ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    # Report intermittent issues (fixes)
    if intermittent_fixes:
        print("\nINTERMITTENT ISSUES (ignored fixes):", file=sys.stderr)
        for test in sorted(intermittent_fixes):
            msg = f"Skipping an intermittent issue {test} (passes in this run but fails in the 'main' branch)"
            print(f"::notice ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    # Report fixes
    if real_fixes:
        print("\nFIXED TESTS:", file=sys.stderr)
        for test in sorted(real_fixes):
            msg = f"Congrats! The gnu test {test} is no longer failing!"
            print(f"::notice ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    # Report newly skipped and passing tests
    if newly_skipped:
        print("\nNEWLY SKIPPED TESTS:", file=sys.stderr)
        for test in sorted(newly_skipped):
            msg = f"Note: The gnu test {test} is now being skipped but was previously passing."
            print(f"::warning ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    if newly_passing:
        print("\nNEWLY PASSING TESTS (previously skipped):", file=sys.stderr)
        for test in sorted(newly_passing):
            msg = f"Congrats! The gnu test {test} is now passing!"
            print(f"::notice ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    # Report newly failing tests (were skipped, now failing)
    if real_newly_failing:
        print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr)
        for test in sorted(real_newly_failing):
            msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing."
            print(f"::warning ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    if intermittent_newly_failing:
        print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr)
        for test in sorted(intermittent_newly_failing):
            msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)"
            print(f"::notice ::{msg}", file=sys.stderr)
            output_lines.append(msg)

    if args.output and output_lines:
        with open(args.output, "w") as f:
            for line in output_lines:
                f.write(f"{line}\n")

    # Return exit code based on whether we found regressions
    return 1 if real_regressions else 0


if __name__ == "__main__":
    sys.exit(main())