File: bm_parse.py

package info (click to toggle)
rapidyaml 0.10.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 53,676 kB
  • sloc: cpp: 73,851; python: 3,678; javascript: 414; xml: 253; makefile: 96; sh: 44
file content (237 lines) | stat: -rw-r--r-- 7,038 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import ryml
import ruamel.yaml
import yaml
import timeit
import time
import copy
import prettytable
import os.path
from collections import OrderedDict as odict


def _nodbg(*args, **kwargs):
    pass


def _dbg(*args, **kwargs):
    print(*args, **kwargs, file=sys.stderr, flush=True)


dbg = _dbg


class RunResults:

    __slots__ = ('name', 'time_ms', 'count', 'avg', 'MBps')

    def __init__(self, name, time_ms, count, num_bytes):
        self.name = name
        self.time_ms = time_ms
        self.count = count
        self.avg = time_ms / count
        num_megabytes = count * num_bytes / 1.0e6
        num_seconds = time_ms / 1000.0
        self.MBps = num_megabytes / num_seconds

    def __str__(self):
        fmt = "{}: count={} time={:.3f}ms avg={:.3f}ms MB/s={:.3f}"
        fmt = fmt.format(self.name, self.count, self.time_ms, self.avg, self.MBps)
        return fmt


class BmCase:

    def __init__(self, filename):
        with open(filename, "r") as f:
            src = f.read()
        self.filename = filename
        self.src_as_str = src
        self.src_as_bytes = bytes(src, "utf8")
        self.src_as_bytearray = bytearray(src, "utf8")
        self.src_as_bytearray_orig = copy.copy(self.src_as_bytearray)
        self.emittree = ryml.parse_in_arena(self.src_as_bytearray)
        self.emitbuf = bytearray(4 * len(self.src_as_str))  # should be enough

    def run(self, bm_method_name, cls):
        def run_bm(obj, subject):
            obj.count = 0
            t = timeit.Timer(subject)
            delta = time.time()
            result = t.autorange() #lambda number, time_taken: time_taken > 1.0)
            delta = 1000. * (time.time() - delta)
            return delta, obj.count
        obj = cls(self)
        if not hasattr(obj, bm_method_name):
            return None
        name = bm_method_name + ":" + cls.__name__
        dbg(name, "...")
        method = getattr(obj, bm_method_name)
        reset_name = 'reset_' + bm_method_name
        reset_fn = getattr(obj, reset_name, None)
        def bm_fn():
            method(self)
            obj.count += 1
            if reset_fn is not None:
                reset_fn(self)
        delta, count = run_bm(obj, bm_fn)
        # correct the benchmark to account for the time spent
        # resetting
        if reset_fn is not None:
            # find out how much it takes to reset the bytearray
            if not hasattr(obj, 'bm_reset_done'):
                def bm_reset():
                    reset_fn(self)
                    obj.count += 1
                rdelta, rcount = run_bm(obj, bm_reset)
                obj.bm_reset_time_per_iteration = rdelta / rcount
                dbg(name, "reset_time_per_iteration={:.3f}us".format(obj.bm_reset_time_per_iteration * 1000.0))
                obj.bm_reset_done = True
            reset_correction = count * obj.bm_reset_time_per_iteration
            dbg(name, "delta={:.3f}ms".format(delta), "reset_correction={:.3f}ms({:.2f}%)".format(reset_correction, 100.0 * reset_correction / delta))
            delta -= reset_correction
        ret = RunResults(name, delta, count, len(self.src_as_str))
        dbg(name, "ok:", ret)
        return ret


def run(case, benchmarks, approaches):
    for bm in benchmarks:
        results = odict()
        for cls in approaches:
            r = case.run(bm, cls)
            if r is None:
                continue
            results[r.name] = r
        table = prettytable.PrettyTable()
        name = os.path.basename(case.filename)
        table.field_names = [name, "count", "time(ms)", "avg(ms)", "avg(MB/s)"]
        table.align[name] = "l"
        def i(v): return "{:5d}".format(v)
        def f(v): return "{:8.3f}".format(v)
        for v in results.values():
            table.add_row([v.name, i(v.count), f(v.time_ms), f(v.avg), f(v.MBps)])
        print(table)


class BmCaseRun:
    def __init__(self, case):
        self.reset_bytearray = False


class RymlParseInArena(BmCaseRun):

    def parse(self, case):
        _ = ryml.parse_in_arena(case.src_as_bytearray)


class RymlParseInArenaReuse(BmCaseRun):

    def __init__(self, case):
        self.tree = ryml.Tree()

    def parse(self, case):
        ryml.parse_in_arena(case.src_as_bytearray, tree=self.tree)

    def reset_parse(self, case):
        self.tree.clear()
        self.tree.clear_arena()


class RymlParseInPlace(BmCaseRun):

    def parse(self, case):
        _ = ryml.parse_in_place(case.src_as_bytearray)

    def reset_parse(self, case):
        case.src_as_bytearray = copy.copy(case.src_as_bytearray_orig)


class RymlParseInPlaceReuse(BmCaseRun):

    def __init__(self, case):
        self.tree = ryml.Tree()

    def parse(self, case):
        ryml.parse_in_place(case.src_as_bytearray, tree=self.tree)

    def reset_parse(self, case):
        self.tree.clear()
        self.tree.clear_arena()
        case.src_as_bytearray = copy.copy(case.src_as_bytearray_orig)


class RuamelYamlParse(BmCaseRun):

    def parse(self, case):
        _ = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader)


class PyYamlParse(BmCaseRun):

    def parse(self, case):
        _ = yaml.safe_load(case.src_as_str)


class RymlEmitToNewBuffer(BmCaseRun):

    def emit_yaml(self, case):
        _ = ryml.emit_yaml(case.emittree)

    def emit_json(self, case):
        _ = ryml.emit_json(case.emittree)


class RymlEmitReuse(BmCaseRun):

    def emit_yaml(self, case):
        _ = ryml.emit_yaml_in_place(case.emittree, case.emitbuf)

    def emit_json(self, case):
        _ = ryml.emit_json_in_place(case.emittree, case.emitbuf)


class RuamelYamlEmit:

    def __init__(self, case):
        case.ruamel_emittree = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader)

    def emit_yaml(self, case):
        # https://stackoverflow.com/a/47617341/5875572
        class MyToStr:
            def __init__(self, *args, **kwargs):
                self.s = b""
            def write(self, s):
                self.s += s
        dumper = MyToStr()
        ruamel.yaml.YAML().dump(case.ruamel_emittree, MyToStr())


class PyYamlEmit:

    def __init__(self, case):
        case.pyyaml_emittree = yaml.load(case.src_as_str, Loader=yaml.Loader)

    def emit_yaml(self, case):
        _ = yaml.dump(case.pyyaml_emittree)


if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        raise Exception("")
    filename = sys.argv[1]
    if filename.endswith("outer1000_inner1000.yml"):  # this one is too heavy for the Python libs
        exit(0)
    case = BmCase(filename)
    run(case, benchmarks=('parse', ),
        approaches=(RuamelYamlParse,
                    PyYamlParse,
                    RymlParseInArena,
                    RymlParseInArenaReuse,
                    RymlParseInPlace,
                    RymlParseInPlaceReuse))
    run(case, benchmarks=('emit_yaml', 'emit_json', ),
        approaches=(RuamelYamlEmit,
                    PyYamlEmit,
                    RymlEmitToNewBuffer,
                    RymlEmitReuse))