1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
import subprocess
import sys
import tempfile
import requests
TEMPLATE = """
import resource
import time
with open({path!r}, "rb") as f:
data = f.read()
initial_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
{setup}
start = time.perf_counter()
for _ in range(5):
decode(data)
stop = time.perf_counter()
max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
# mem_mib = (max_rss * 1024 - len(data)) / (1024 * 1024)
mem_mib = (max_rss - initial_rss) / 1024
time_ms = ((stop - start) / 5) * 1000
print([mem_mib, time_ms])
"""
JSON = """
import json
decode = json.loads
"""
UJSON = """
import ujson
decode = ujson.loads
"""
ORJSON = """
import orjson
decode = orjson.loads
"""
RAPIDJSON = """
import rapidjson
decode = rapidjson.loads
"""
SIMDJSON = """
import simdjson
decode = simdjson.loads
"""
MSGSPEC = """
import msgspec
decode = msgspec.json.decode
"""
MSGSPEC_STRUCTS = """
import msgspec
from typing import Union
class Package(msgspec.Struct, gc=False):
build: str
build_number: int
depends: tuple[str, ...]
md5: str
name: str
sha256: str
subdir: str
version: str
license: str = ""
noarch: Union[str, bool, None] = None
size: int = 0
timestamp: int = 0
class RepoData(msgspec.Struct, gc=False):
repodata_version: int
info: dict
packages: dict[str, Package]
removed: tuple[str, ...]
decode = msgspec.json.Decoder(RepoData).decode
"""
def main():
import argparse
benchmarks = [
("json", None, JSON),
("ujson", "ujson", UJSON),
("orjson", "orjson", ORJSON),
("rapidjson", "python-rapidjson", RAPIDJSON),
("simdjson", "pysimdjson", SIMDJSON),
("msgspec", "msgspec", MSGSPEC),
("msgspec structs", None, MSGSPEC_STRUCTS),
]
benchmark_names = [bench_data[0] for bench_data in benchmarks]
parser = argparse.ArgumentParser(
description="Benchmark decoding a large JSON message using various JSON libraries"
)
parser.add_argument(
"-b",
"--bench-name",
dest="bench_names",
nargs="*",
choices=benchmark_names,
default=benchmark_names,
help="A list of benchmark names to run. Defaults to all.",
)
parser.add_argument(
"--versions",
action="store_true",
help="Output library version info, and exit immediately",
)
args = parser.parse_args()
bench_names = set(args.bench_names)
# Used as the baseline for comparisons
bench_names.add("msgspec structs")
if args.versions:
import importlib.metadata
for _, lib, _ in benchmarks:
if lib is not None:
version = importlib.metadata.version(lib)
print(f"- {lib}: {version}")
sys.exit(0)
with tempfile.NamedTemporaryFile() as f:
# Download the repodata.json
resp = requests.get(
"https://conda.anaconda.org/conda-forge/noarch/repodata.json"
)
resp.raise_for_status()
f.write(resp.content)
# Run the benchmark for each library
results = {}
import ast
for name, _, setup in benchmarks:
if name not in bench_names:
continue
script = TEMPLATE.format(path=f.name, setup=setup)
# We execute each script in a subprocess to isolate their memory usage
output = subprocess.check_output([sys.executable, "-c", script])
results[name] = ast.literal_eval(output.decode())
# Compose the results table
best_mem, best_time = results["msgspec structs"]
# Avoid division by zero if memory is 0
if not best_mem:
best_mem = 1.0
columns = (
"",
"memory (MiB)",
"vs.",
"time (ms)",
"vs.",
)
rows = [
(
f"**{name}**",
f"{mem:.1f}",
f"{mem / best_mem:.1f}x",
f"{time:.1f}",
f"{time / best_time:.1f}x",
)
for name, (mem, time) in results.items()
]
rows.sort(key=lambda x: float(x[1]))
widths = tuple(
max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)
)
row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
header = row_template % tuple(columns)
bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
parts = [bar, header, bar_underline]
for r in rows:
parts.append(row_template % r)
parts.append(bar)
print("\n".join(parts))
if __name__ == "__main__":
main()
|