1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
|
#!/usr/bin/env python3
from pathlib import Path
import sys
import argparse
import json
import subprocess
import re
def main():
args = parse_args()
benchmark = args.build_dir / 'benchmarks' / 'benchmark'
if not benchmark.is_file():
raise RuntimeError(f"{benchmark} not found, was simdutf compiled with `-DSIMDUTF_BENCHMARKS=ON`?")
# load registered tests
simdutf_tests = [entry for entry in json.loads(execute([benchmark, "--json"])) if entry["simdutf"]]
maxlen = max(len(entry['name']) for entry in simdutf_tests)
count = len(simdutf_tests)
for id, test in enumerate(simdutf_tests, 1):
name = test["name"]
encodings = test["encodings"]
print(f"[{id}/{count}] {name:{maxlen}}... ", end='')
sys.stdout.flush()
if args.filter and not re.search(args.filter, name):
print(f"skipping, does not match filter '{args.filter}'")
continue
if len(encodings) > 1:
print(f"skipping, requires multiple input encodings ({encodings})")
continue
report_name = (name + '.txt')
report_path = args.output_dir / report_name
if report_path.exists():
print("already benchmarked")
continue
enc = encodings[0]
cmd = [benchmark, "-P", name, "-F"] + input_files(args.lipsum_dir, enc)
result = execute(cmd)
print(f"creating {report_path}")
report_path.write_bytes(result)
def input_files(lipsum_dir, encoding):
dir = lipsum_dir / 'wikipedia_mars'
return list(dir.glob(patterns[encoding]))
# in unicode_ipsum/wikipedia_mars
patterns = {
'latin1' : '*.latin1.txt',
'utf8' : '*.utf8.txt',
'utf16le' : '*.utf16.txt',
'utf16be' : '*.utf16be.txt',
'utf32le' : '*.utf32.txt',
}
def parse_args():
p = argparse.ArgumentParser("SIMDUTF tool for benchmarking")
p.add_argument("--build-dir",
required=True,
type=existing_dir,
metavar="DIR",
help="simdutf build directory")
p.add_argument("--lipsum-dir",
required=True,
type=existing_dir,
metavar="DIR",
help="unicode lorem lipsum directory")
p.add_argument("--output-dir",
required=True,
type=existing_dir,
metavar="DIR",
help="output directory")
p.add_argument("--filter",
type=str,
metavar="PATTERN",
help="only run benchmarks whose name matches this regex pattern")
args = p.parse_args()
return args
def existing_dir(s):
path = Path(s).expanduser().absolute()
if not path.is_dir():
raise argparse.ArgumentError("'{path}' is not a directory")
return path
def execute(params):
cmd = [str(param) for param in params]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print(' '.join(cmd))
if result.stderr:
print("stderr:")
print(result.stderr)
if result.stdout:
print("stdout:")
print(result.stdout)
print(f"return code: {result.returncode}")
raise ValueError("command failed")
return result.stdout
if __name__ == '__main__':
main()
|