1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
|
import subprocess
import statistics
from io import StringIO
import csv
from dataclasses import dataclass
import argparse
from typing import Optional, Union, Tuple, List
import functools
print = functools.partial(print, flush=True)
STDERR_HEADER = '''====================================================
============== STDERR =============
====================================================
'''
STDOUT_HEADER = '''====================================================
============== STDOUT =============
====================================================
'''
# timeouts in seconds
MAX_TIMEOUT = 3600
DEFAULT_TIMEOUT = 600
@dataclass
class BenchmarkRunnerConfig:
"Configuration for a BenchmarkRunner"
benchmark_runner: str
benchmark_file: str
verbose: bool = False
threads: Optional[int] = None
memory_limit: Optional[str] = None
disable_timeout: bool = False
max_timeout: int = MAX_TIMEOUT
root_dir: str = ""
no_summary: bool = False
@classmethod
def from_params(cls, benchmark_runner, benchmark_file, **kwargs) -> "BenchmarkRunnerConfig":
verbose = kwargs.get("verbose", False)
threads = kwargs.get("threads", None)
memory_limit = kwargs.get("memory_limit", None)
disable_timeout = kwargs.get("disable_timeout", False)
max_timeout = kwargs.get("max_timeout", MAX_TIMEOUT)
root_dir = kwargs.get("root_dir", "")
no_summary = kwargs.get("no_summary", False)
config = cls(
benchmark_runner=benchmark_runner,
benchmark_file=benchmark_file,
verbose=verbose,
threads=threads,
memory_limit=memory_limit,
disable_timeout=disable_timeout,
max_timeout=max_timeout,
root_dir=root_dir,
no_summary=no_summary,
)
return config
@classmethod
def from_args(cls) -> "BenchmarkRunnerConfig":
parser = argparse.ArgumentParser(description="Benchmark script with old and new runners.")
# Define the arguments
parser.add_argument("--path", type=str, help="Path to the benchmark_runner executable", required=True)
parser.add_argument("--benchmarks", type=str, help="Path to the benchmark file.", required=True)
parser.add_argument("--verbose", action="store_true", help="Enable verbose output.")
parser.add_argument("--threads", type=int, help="Number of threads to use.")
parser.add_argument("--memory_limit", type=str, help="Memory limit to use.")
parser.add_argument("--disable-timeout", action="store_true", help="Disable timeout.")
parser.add_argument(
"--max-timeout", type=int, default=3600, help="Set maximum timeout in seconds (default: 3600)."
)
parser.add_argument("--root-dir", type=str, default="", help="Root directory.")
parser.add_argument(
"--no-summary", type=str, default=False, help="No failures summary is outputed when passing this flag."
)
# Parse arguments
parsed_args = parser.parse_args()
# Create an instance of BenchmarkRunnerConfig using parsed arguments
config = cls(
benchmark_runner=parsed_args.path,
benchmark_file=parsed_args.benchmarks,
verbose=parsed_args.verbose,
threads=parsed_args.threads,
memory_limit=parsed_args.memory_limit,
disable_timeout=parsed_args.disable_timeout,
max_timeout=parsed_args.max_timeout,
root_dir=parsed_args.root_dir,
no_summary=parsed_args.no_summary,
)
return config
class BenchmarkRunner:
def __init__(self, config: BenchmarkRunnerConfig):
self.config = config
self.complete_timings = []
self.benchmark_list: List[str] = []
with open(self.config.benchmark_file, 'r') as f:
self.benchmark_list = [x.strip() for x in f.read().split('\n') if len(x) > 0]
def construct_args(self, benchmark_path):
benchmark_args = []
benchmark_args.extend([self.config.benchmark_runner, benchmark_path])
if self.config.root_dir:
benchmark_args.extend(['--root-dir', self.config.root_dir])
if self.config.threads:
benchmark_args.extend([f"--threads={self.config.threads}"])
if self.config.memory_limit:
benchmark_args.extend([f"--memory_limit={self.config.memory_limit}"])
if self.config.disable_timeout:
benchmark_args.extend(["--disable-timeout"])
if self.config.no_summary:
benchmark_args.extend(["--no-summary"])
return benchmark_args
def run_benchmark(self, benchmark) -> Tuple[Union[float, str], Optional[str]]:
benchmark_args = self.construct_args(benchmark)
timeout_seconds = DEFAULT_TIMEOUT
if self.config.disable_timeout:
timeout_seconds = self.config.max_timeout
try:
proc = subprocess.run(
benchmark_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout_seconds
)
out = proc.stdout.decode('utf8')
err = proc.stderr.decode('utf8')
returncode = proc.returncode
except subprocess.TimeoutExpired:
print("Failed to run benchmark " + benchmark)
print(f"Aborted due to exceeding the limit of {timeout_seconds} seconds")
return (
'Failed to run benchmark ' + benchmark,
f"Aborted due to exceeding the limit of {timeout_seconds} seconds",
)
if returncode != 0:
print("Failed to run benchmark " + benchmark)
print(STDERR_HEADER)
print(err)
print(STDOUT_HEADER)
print(out)
if 'HTTP' in err:
print("Ignoring HTTP error and terminating the running of the regression tests")
exit(0)
return 'Failed to run benchmark ' + benchmark, err
if self.config.verbose:
print(err)
# read the input CSV
f = StringIO(err)
csv_reader = csv.reader(f, delimiter='\t')
header = True
timings = []
try:
for row in csv_reader:
if len(row) == 0:
continue
if header:
header = False
else:
timings.append(row[2])
self.complete_timings.append(row[2])
return float(statistics.median(timings)), None
except:
print("Failed to run benchmark " + benchmark)
print(err)
return 'Failed to run benchmark ' + benchmark, err
def run_benchmarks(self, benchmark_list: List[str]):
results = {}
failures = {}
for benchmark in benchmark_list:
result, failure_message = self.run_benchmark(benchmark)
results[benchmark] = result
failures[benchmark] = failure_message if failure_message else None
return results, failures
def main():
config = BenchmarkRunnerConfig.from_args()
runner = BenchmarkRunner(config)
runner.run_benchmarks()
if __name__ == "__main__":
main()
|