File: bench_encodings.py

package info (click to toggle)
python-msgspec 0.19.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,356 kB
  • sloc: javascript: 23,944; ansic: 20,540; python: 20,465; makefile: 29; sh: 19
file content (198 lines) | stat: -rw-r--r-- 5,661 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
from __future__ import annotations

import sys
import dataclasses
import json
import timeit
import importlib.metadata
from typing import Any, Literal, Callable

from .generate_data import make_filesystem_data

import msgspec


class File(msgspec.Struct, kw_only=True, omit_defaults=True, tag="file"):
    name: str
    created_by: str
    created_at: str
    updated_by: str | None = None
    updated_at: str | None = None
    nbytes: int
    permissions: Literal["READ", "WRITE", "READ_WRITE"]


class Directory(msgspec.Struct, kw_only=True, omit_defaults=True, tag="directory"):
    name: str
    created_by: str
    created_at: str
    updated_by: str | None = None
    updated_at: str | None = None
    contents: list[File | Directory]


@dataclasses.dataclass
class Benchmark:
    label: str
    version: str
    encode: Callable
    decode: Callable
    schema: Any = None

    def run(self, data: bytes) -> dict:
        if self.schema is not None:
            data = msgspec.convert(data, self.schema)
        timer = timeit.Timer("func(data)", globals={"func": self.encode, "data": data})
        n, t = timer.autorange()
        encode_time = t / n

        data = self.encode(data)

        timer = timeit.Timer("func(data)", globals={"func": self.decode, "data": data})
        n, t = timer.autorange()
        decode_time = t / n

        return {
            "label": self.label,
            "encode": encode_time,
            "decode": decode_time,
        }


def json_benchmarks():
    import orjson
    import ujson
    import rapidjson
    import simdjson

    simdjson_ver = importlib.metadata.version("pysimdjson")

    rj_dumps = rapidjson.Encoder()
    rj_loads = rapidjson.Decoder()

    def uj_dumps(obj):
        return ujson.dumps(obj)

    enc = msgspec.json.Encoder()
    dec = msgspec.json.Decoder(Directory)
    dec2 = msgspec.json.Decoder()

    return [
        Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory),
        Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode),
        Benchmark("json", None, json.dumps, json.loads),
        Benchmark("orjson", orjson.__version__, orjson.dumps, orjson.loads),
        Benchmark("ujson", ujson.__version__, uj_dumps, ujson.loads),
        Benchmark("rapidjson", rapidjson.__version__, rj_dumps, rj_loads),
        Benchmark("simdjson", simdjson_ver, simdjson.dumps, simdjson.loads),
    ]


def msgpack_benchmarks():
    import msgpack
    import ormsgpack

    enc = msgspec.msgpack.Encoder()
    dec = msgspec.msgpack.Decoder(Directory)
    dec2 = msgspec.msgpack.Decoder()

    return [
        Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory),
        Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode),
        Benchmark("msgpack", msgpack.__version__, msgpack.dumps, msgpack.loads),
        Benchmark(
            "ormsgpack", ormsgpack.__version__, ormsgpack.packb, ormsgpack.unpackb
        ),
    ]


def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Benchmark different python serialization libraries"
    )
    parser.add_argument(
        "--versions",
        action="store_true",
        help="Output library version info, and exit immediately",
    )
    parser.add_argument(
        "-n",
        type=int,
        help="The number of objects in the generated data, defaults to 1000",
        default=1000,
    )
    parser.add_argument(
        "-p",
        "--protocol",
        choices=["json", "msgpack"],
        default="json",
        help="The protocol to benchmark, defaults to JSON",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="whether to output the results as json",
    )
    args = parser.parse_args()

    benchmarks = json_benchmarks() if args.protocol == "json" else msgpack_benchmarks()

    if args.versions:
        for bench in benchmarks:
            if bench.version is not None:
                print(f"- {bench.label}: {bench.version}")
        sys.exit(0)

    data = make_filesystem_data(args.n)

    results = [benchmark.run(data) for benchmark in benchmarks]

    if args.json:
        for line in results:
            print(json.dumps(line))
    else:
        # Compose the results table
        results.sort(key=lambda row: row["encode"] + row["decode"])
        best_et = results[0]["encode"]
        best_dt = results[0]["decode"]
        best_tt = best_et + best_dt

        columns = (
            "",
            "encode (μs)",
            "vs.",
            "decode (μs)",
            "vs.",
            "total (μs)",
            "vs.",
        )
        rows = [
            (
                r["label"],
                f"{1_000_000 * r['encode']:.1f}",
                f"{r['encode'] / best_et:.1f}",
                f"{1_000_000 * r['decode']:.1f}",
                f"{r['decode'] / best_dt:.1f}",
                f"{1_000_000 * (r['encode'] + r['decode']):.1f}",
                f"{(r['encode'] + r['decode']) / best_tt:.1f}",
            )
            for r in results
        ]
        widths = tuple(
            max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)
        )
        row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
        header = row_template % tuple(columns)
        bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
        bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
        parts = [bar, header, bar_underline]
        for r in rows:
            parts.append(row_template % r)
            parts.append(bar)
        print("\n".join(parts))


if __name__ == "__main__":
    main()