1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
import logging
import os
import re
from collections import defaultdict
import click
import pandas as pd
from tabulate import tabulate
def gmean(s):
return s.product() ** (1 / len(s))
def find_csv_files(path, perf_compare):
"""
Recursively search for all CSV files in directory and subdirectories whose
name contains a target string.
"""
def is_csv(f):
if perf_compare:
regex = r"training_(torchbench|huggingface|timm_models)\.csv"
return re.match(regex, f) is not None
else:
return f.endswith("_performance.csv")
csv_files = []
for root, dirs, files in os.walk(path):
for file in files:
if is_csv(file):
csv_files.append(os.path.join(root, file))
return csv_files
@click.command()
@click.argument("directory", default="artifacts")
@click.option("--amp", is_flag=True)
@click.option("--float32", is_flag=True)
@click.option(
"--perf-compare",
is_flag=True,
help="Set if the CSVs were generated by running manually the action rather than picking them from the nightly job",
)
def main(directory, amp, float32, perf_compare):
"""
Given a directory containing multiple CSVs from --performance benchmark
runs, aggregates and generates summary statistics similar to the web UI at
https://torchci-git-fork-huydhn-add-compilers-bench-74abf8-fbopensource.vercel.app/benchmark/compilers
This is most useful if you've downloaded CSVs from CI and need to quickly
look at aggregate stats. The CSVs are expected to follow exactly the same
naming convention that is used in CI.
You may also be interested in
https://docs.google.com/document/d/1DQQxIgmKa3eF0HByDTLlcJdvefC4GwtsklJUgLs09fQ/edit#
which explains how to interpret the raw csv data.
"""
dtypes = ["amp", "float32"]
if amp and not float32:
dtypes = ["amp"]
if float32 and not amp:
dtypes = ["float32"]
dfs = defaultdict(list)
for f in find_csv_files(directory, perf_compare):
try:
dfs[os.path.basename(f)].append(pd.read_csv(f))
except Exception:
logging.warning("failed parsing %s", f)
raise
# dtype -> statistic -> benchmark -> compiler -> value
results = defaultdict( # dtype
lambda: defaultdict( # statistic
lambda: defaultdict(dict) # benchmark # compiler -> value
)
)
for k, v in sorted(dfs.items()):
if perf_compare:
regex = r"training_(torchbench|huggingface|timm_models)\.csv"
m = re.match(regex, k)
assert m is not None, k
compiler = "inductor"
benchmark = m.group(1)
dtype = "float32"
mode = "training"
device = "cuda"
else:
regex = (
"(.+)_"
"(torchbench|huggingface|timm_models)_"
"(float32|amp)_"
"(inference|training)_"
"(cpu|cuda)_"
r"performance\.csv"
)
m = re.match(regex, k)
compiler = m.group(1)
benchmark = m.group(2)
dtype = m.group(3)
mode = m.group(4)
device = m.group(5)
df = pd.concat(v)
df = df.dropna().query("speedup != 0")
statistics = {
"speedup": gmean(df["speedup"]),
"comptime": df["compilation_latency"].mean(),
"memory": gmean(df["compression_ratio"]),
}
if dtype not in dtypes:
continue
for statistic, v in statistics.items():
results[f"{device} {dtype} {mode}"][statistic][benchmark][compiler] = v
descriptions = {
"speedup": "Geometric mean speedup",
"comptime": "Mean compilation time",
"memory": "Peak memory compression ratio",
}
for dtype_mode, r in results.items():
print(f"# {dtype_mode} performance results")
for statistic, data in r.items():
print(f"## {descriptions[statistic]}")
table = []
for row_name in data[next(iter(data.keys()))]:
row = [row_name]
for col_name in data:
row.append(round(data[col_name][row_name], 2))
table.append(row)
headers = list(data.keys())
print(tabulate(table, headers=headers))
print()
main()
|