File: graph

package info (click to toggle)
python-orjson 3.10.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,180 kB
  • sloc: ansic: 11,270; python: 6,658; sh: 135; makefile: 9
file content (152 lines) | stat: -rwxr-xr-x 4,574 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python3
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import collections
import io
import math
import os

import pandas as pd
import seaborn
from matplotlib import pyplot
from tabulate import tabulate

import orjson

LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json")


def aggregate():
    benchmarks_dir = os.path.join(".benchmarks", os.listdir(".benchmarks")[0])
    res = collections.defaultdict(dict)
    for filename in os.listdir(benchmarks_dir):
        with open(os.path.join(benchmarks_dir, filename), "r") as fileh:
            data = orjson.loads(fileh.read())

        for each in data["benchmarks"]:
            res[each["group"]][each["extra_info"]["lib"]] = {
                "data": [val * 1000 for val in each["stats"]["data"]],
                "median": each["stats"]["median"] * 1000,
                "ops": each["stats"]["ops"],
                "correct": each["extra_info"]["correct"],
            }
    return res


def tab(obj):
    buf = io.StringIO()
    headers = (
        "Library",
        "Median latency (milliseconds)",
        "Operations per second",
        "Relative (latency)",
    )

    seaborn.set(rc={"figure.facecolor": (0, 0, 0, 0)})
    seaborn.set_style("darkgrid")

    barplot_data = []
    for group, val in sorted(obj.items(), reverse=True):
        buf.write("\n" + "#### " + group + "\n\n")
        table = []
        for lib in LIBRARIES:
            correct = val[lib]["correct"]
            table.append(
                [
                    lib,
                    val[lib]["median"] if correct else None,
                    int(val[lib]["ops"]) if correct else None,
                    0,
                ]
            )
            barplot_data.append(
                {
                    "operation": "deserialization"
                    if "deserialization" in group
                    else "serialization",
                    "group": group.strip("serialization")
                    .strip("deserialization")
                    .strip(),
                    "library": lib,
                    "latency": val[lib]["median"],
                    "operations": int(val[lib]["ops"]) if correct else None,
                }
            )

        orjson_baseline = table[0][1]
        for each in table:
            each[3] = (
                "%.1f" % (each[1] / orjson_baseline)
                if isinstance(each[1], float)
                else None
            )
            if group.startswith("github"):
                each[1] = "%.2f" % each[1] if isinstance(each[1], float) else None
            else:
                each[1] = "%.1f" % each[1] if isinstance(each[1], float) else None

        buf.write(tabulate(table, headers, tablefmt="github") + "\n")

    for operation in ("deserialization", "serialization"):
        per_op_data = list(
            (each for each in barplot_data if each["operation"] == operation)
        )
        if not per_op_data:
            continue

        max_y = 0

        json_baseline = {}
        for each in per_op_data:
            if each["group"] == "witter.json":
                each["group"] = "twitter.json"
            if each["library"] == "json":
                json_baseline[each["group"]] = each["operations"]

        for each in per_op_data:
            relative = each["operations"] / json_baseline[each["group"]]
            each["relative"] = relative
            max_y = max(max_y, relative)

        p = pd.DataFrame.from_dict(per_op_data)
        p.groupby("group")

        graph = seaborn.barplot(
            p,
            x="group",
            y="relative",
            orient="x",
            hue="library",
            errorbar="sd",
            legend="brief",
        )
        graph.set_xlabel("Document")
        graph.set_ylabel("Operations/second relative to stdlib json")

        pyplot.title(operation)

        # ensure Y range
        max_y = int(math.ceil(max_y))
        if max_y > 10 and max_y % 2 > 0:
            max_y = max_y + 1
        pyplot.gca().set_yticks(
            list(
                {1, max_y}.union(
                    set(int(y) for y in pyplot.gca().get_yticks() if int(y) <= max_y)
                )
            )
        )

        # print Y as percent
        pyplot.gca().set_yticklabels([f"{x}x" for x in pyplot.gca().get_yticks()])

        # reference for stdlib
        pyplot.axhline(y=1, color="#999", linestyle="dashed")

        pyplot.savefig(fname=f"doc/{operation}", dpi=300)
        pyplot.close()

    print(buf.getvalue())


tab(aggregate())