File: summarize_perf.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (144 lines) | stat: -rw-r--r-- 4,463 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import logging
import os
import re
from collections import defaultdict

import click
import pandas as pd
from tabulate import tabulate


def gmean(s):
    return s.product() ** (1 / len(s))


def find_csv_files(path, perf_compare):
    """
    Recursively search for all CSV files in directory and subdirectories whose
    name contains a target string.
    """

    def is_csv(f):
        if perf_compare:
            regex = r"training_(torchbench|huggingface|timm_models)\.csv"
            return re.match(regex, f) is not None
        else:
            return f.endswith("_performance.csv")

    csv_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if is_csv(file):
                csv_files.append(os.path.join(root, file))
    return csv_files


@click.command()
@click.argument("directory", default="artifacts")
@click.option("--amp", is_flag=True)
@click.option("--float32", is_flag=True)
@click.option(
    "--perf-compare",
    is_flag=True,
    help="Set if the CSVs were generated by running manually the action rather than picking them from the nightly job",
)
def main(directory, amp, float32, perf_compare):
    """
    Given a directory containing multiple CSVs from --performance benchmark
    runs, aggregates and generates summary statistics similar to the web UI at
    https://torchci-git-fork-huydhn-add-compilers-bench-74abf8-fbopensource.vercel.app/benchmark/compilers

    This is most useful if you've downloaded CSVs from CI and need to quickly
    look at aggregate stats.  The CSVs are expected to follow exactly the same
    naming convention that is used in CI.

    You may also be interested in
    https://docs.google.com/document/d/1DQQxIgmKa3eF0HByDTLlcJdvefC4GwtsklJUgLs09fQ/edit#
    which explains how to interpret the raw csv data.
    """
    dtypes = ["amp", "float32"]
    if amp and not float32:
        dtypes = ["amp"]
    if float32 and not amp:
        dtypes = ["float32"]

    dfs = defaultdict(list)
    for f in find_csv_files(directory, perf_compare):
        try:
            dfs[os.path.basename(f)].append(pd.read_csv(f))
        except Exception:
            logging.warning("failed parsing %s", f)
            raise

    # dtype -> statistic -> benchmark -> compiler -> value
    results = defaultdict(  # dtype
        lambda: defaultdict(  # statistic
            lambda: defaultdict(dict)  # benchmark  # compiler -> value
        )
    )

    for k, v in sorted(dfs.items()):
        if perf_compare:
            regex = r"training_(torchbench|huggingface|timm_models)\.csv"
            m = re.match(regex, k)
            assert m is not None, k
            compiler = "inductor"
            benchmark = m.group(1)
            dtype = "float32"
            mode = "training"
            device = "cuda"
        else:
            regex = (
                "(.+)_"
                "(torchbench|huggingface|timm_models)_"
                "(float32|amp)_"
                "(inference|training)_"
                "(cpu|cuda)_"
                r"performance\.csv"
            )
            m = re.match(regex, k)
            compiler = m.group(1)
            benchmark = m.group(2)
            dtype = m.group(3)
            mode = m.group(4)
            device = m.group(5)

        df = pd.concat(v)
        df = df.dropna().query("speedup != 0")

        statistics = {
            "speedup": gmean(df["speedup"]),
            "comptime": df["compilation_latency"].mean(),
            "memory": gmean(df["compression_ratio"]),
        }

        if dtype not in dtypes:
            continue

        for statistic, v in statistics.items():
            results[f"{device} {dtype} {mode}"][statistic][benchmark][compiler] = v

    descriptions = {
        "speedup": "Geometric mean speedup",
        "comptime": "Mean compilation time",
        "memory": "Peak memory compression ratio",
    }

    for dtype_mode, r in results.items():
        print(f"# {dtype_mode} performance results")
        for statistic, data in r.items():
            print(f"## {descriptions[statistic]}")

            table = []
            for row_name in data[next(iter(data.keys()))]:
                row = [row_name]
                for col_name in data:
                    row.append(round(data[col_name][row_name], 2))
                table.append(row)

            headers = list(data.keys())
            print(tabulate(table, headers=headers))
            print()


main()