File: benchmark.py

package info (click to toggle)
postgresql-pgmp 1.0.5-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 556 kB
  • sloc: ansic: 2,059; sql: 853; python: 591; makefile: 101; sh: 15
file content (440 lines) | stat: -rwxr-xr-x 13,707 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
#!/usr/bin/env python
"""Script to perform comparisons between pmpz and other data types.

Copyright (C) 2011-2020 - Daniele Varrazzo
"""
import sys

import psycopg2

import logging
logger = logging.getLogger()
logging.basicConfig(
    level=logging.INFO,
    stream=sys.stderr,
    format="%(asctime)s %(levelname)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S")


class SkipTest(Exception):
    """The test can't be performed for some reason."""

class Benchmark(object):
    """Base class for a test.

    Tests should subclass and create methods called test_whatever. There can
    also be methods setup_whatever that will be invoked just once (the test
    can be repeated if -r is not 1).
    """
    # Subclass may change these details that will appear in the plots.
    title = "Benchmark"
    xlabel = "x axis"
    ylabel = "y axis"

    def __init__(self, opt):
        self.opt = opt

    def run(self):
        opt = self.opt
        self.conn = psycopg2.connect(opt.dsn)
        self.conn.set_isolation_level(0)

        tests = []
        for k in dir(self):
            if k.startswith('test_'):
                tests.append((k[5:], getattr(self, k)))
        tests.sort()

        print "title:", self.title
        print "xlabel:", self.xlabel
        print "ylabel:", self.ylabel

        for n in opt.nsamples:
            for s in opt.size:
                for name, f in tests:
                    # test initialization
                    setup = getattr(self, 'setup_' + name, None)
                    if setup:
                        logger.info("setup %s: n=%d s=%d", name, n, s)
                        try:
                            setup(n, s)
                        except SkipTest, e:
                            logger.info("skipping %s (n=%d s=%d): %s",
                                name, n, s, e)
                            continue

                    # test run
                    results = []
                    for i in xrange(opt.repeats):
                        logger.info("test %s (n=%d s=%s) run %d of %d",
                            name, n, s, i+1, opt.repeats)
                        results.append(f(n, s))
                        logger.info("result: %s", results[-1])

                    result = self.best_value(results)
                    print self.__class__.__name__, name, n, s, result

    def best_value(self, results):
        """Take the best values from a list of results."""
        return min(results)


class SumRational(Benchmark):
    """Test the time used to perform sum(x) for mpq and decimal data types.

    The type represent the same values.
    """
    _title = "Time for sum() for values with scale %s"
    xlabel = "Numbers size (in decimal digits)"
    ylabel = "Time (in millis)"

    @property
    def title(self): return self._title % self.opt.scale

    def setup_numeric(self, n, s):
        self._setup(n, s, "test_sum_rat_numeric",
            "create table test_sum_rat_numeric (n numeric(%s,%s));"
                % (s, self.opt.scale))

    def test_numeric(self, n, s):
        return self._test("test_sum_rat_numeric")

    def setup_mpq(self, n, s):
        self._setup(n, s, "test_sum_rat_mpq",
            "create table test_sum_rat_mpq (n mpq);")

    def test_mpq(self, n, s):
        return self._test("test_sum_rat_mpq")

    def _setup(self, n, s, table, query):
        cur = self.conn.cursor()
        cur.execute("drop table if exists %s;" % table)
        cur.execute(query)
        cur.execute("""
            select randinit();
            select randseed(31415926);

            insert into %s
            select urandomm(%%(max)s::mpz)::mpq / %%(scale)s
            from generate_series(1, %%(n)s);
            """ % table, {
            'max': 10 ** s,
            'scale': 10 ** self.opt.scale,
            'n': n})

        cur.execute("vacuum analyze %s;" % table)

    def _test(self, table):
        cur = self.conn.cursor()
        cur.execute("explain analyze select sum(n) from %s;" % table)
        recs = cur.fetchall()
        return float(recs[-1][0].split()[-2])


class SumInteger(Benchmark):
    """Test the time used to perform sum(n) for different data types.

    n is read from a table.
    """
    title = "Time to calculate sum() on a table"
    xlabel = "Numbers size (in decimal digits)"
    ylabel = "Time (in millis)"

    def setup_mpq(self, n, s):
        self._setup(n, s, "test_sum_mpq",
            "create table test_sum_mpq (n mpq);")

    def setup_mpz(self, n, s):
        self._setup(n, s, "test_sum_mpz",
            "create table test_sum_mpz (n mpz);")

    def setup_numeric(self, n, s):
        self._setup(n, s, "test_sum_numeric",
            "create table test_sum_numeric (n numeric);")

    def setup_int8(self, n, s):
        if s > 18:
            raise SkipTest("skipping test with %d digits" % s)

        self._setup(n, s, "test_sum_int8",
            "create table test_sum_int8 (n int8);")

    def setup_int4(self, n, s):
        if s > 9:
            raise SkipTest("skipping test with %d digits" % s)

        self._setup(n, s, "test_sum_int4",
            "create table test_sum_int4 (n int4);")

    def _setup(self, n, s, table, query):
        cur = self.conn.cursor()
        cur.execute("drop table if exists %s;" % table)
        cur.execute(query)
        cur.execute("""
            select randinit();
            select randseed(31415926);

            insert into %s
            select urandomm(%%(max)s::mpz)s
            from generate_series(1, %%(n)s);
            """ % table,
            { 'max': 10 ** s, 'n': n})

        cur.execute("vacuum analyze %s;" % table)

    def test_mpq(self, n, s):
        return self._test('test_sum_mpq')

    def test_mpz(self, n, s):
        return self._test('test_sum_mpz')

    def test_numeric(self, n, s):
        return self._test('test_sum_numeric')

    def test_int8(self, n, s):
        return self._test('test_sum_int8')

    def test_int4(self, n, s):
        return self._test('test_sum_int4')

    def _test(self, table):
        cur = self.conn.cursor()
        cur.execute("explain analyze select sum(n) from %s;" % table)
        recs = cur.fetchall()
        return float(recs[-1][0].split()[-2])


class Arith(Benchmark):
    """Perform an operation sum(a + b * c / d) on a table.
    """
    title = "Performance on arithmetic operations"
    xlabel = "Numbers size (in decimal digits)"
    ylabel = "Time (in millis)"

    def setup_mpq(self, n, s):
        self._setup(n, s, "test_arith_mpq", """
            create table test_arith_mpq
                (a mpq, b mpq, c mpq, d mpq);""")

    def setup_mpz(self, n, s):
        self._setup(n, s, "test_arith_mpz", """
            create table test_arith_mpz
                (a mpz, b mpz, c mpz, d mpz);""")

    def setup_numeric(self, n, s):
        self._setup(n, s, "test_arith_numeric", """
            create table test_arith_numeric (
                a numeric(%s), b numeric(%s), c numeric(%s), d numeric(%s));
                """ % ((s,s,s,s+1)))

    def setup_int8(self, n, s):
        if s > 9:
            raise SkipTest("skipping test with %d digits" % s)

        self._setup(n, s, "test_arith_int8", """
            create table test_arith_int8
                (a int8, b int8, c int8, d int8);""")

    def setup_int4(self, n, s):
        if s > 4:
            raise SkipTest("skipping test with %d digits" % s)

        self._setup(n, s, "test_arith_int4", """
            create table test_arith_int4
                (a int4, b int4, c int4, d int4);""")

    def test_mpq(self, n, s):
        return self._test("test_arith_mpq")

    def test_mpz(self, n, s):
        return self._test("test_arith_mpz")

    def test_numeric(self, n, s):
        return self._test("test_arith_numeric")

    def test_int8(self, n, s):
        return self._test("test_arith_int8")

    def test_int4(self, n, s):
        return self._test("test_arith_int4")

    def _setup(self, n, s, table, query):
        cur = self.conn.cursor()
        cur.execute("drop table if exists %s;" % table)
        cur.execute(query)
        cur.execute("""
            select randinit();
            select randseed(31415926);

            insert into %s
            select
                urandomm(%%(max)s::mpz), urandomm(%%(max)s::mpz),
                urandomm(%%(max)s::mpz), urandomm(%%(max)s::mpz) + 1
            from generate_series(1, %%(n)s);
            """ % table, {
            'max': 10 ** s,
            'n': n})

        cur.execute("vacuum analyze %s;" % table)

    def _test(self, table):
        cur = self.conn.cursor()
        cur.execute("""explain analyze
            select sum(a + b * c / d)
            from %s;""" % table)
        recs = cur.fetchall()
        return float(recs[-1][0].split()[-2])


class Factorial(Benchmark):
    """Measure the speed to calculate the factorial of n"""
    title = "Time to calculate n!"
    xlabel = "Input number"
    ylabel = "Time (in millis)"

    def setup_mpz(self, n, s):
        self._setup('mpz', 'mpz_mul')

    def setup_numeric(self, n, s):
        self._setup('numeric', 'numeric_mul')

    def _setup(self, type, f):
        cur = self.conn.cursor()
        cur.execute("drop aggregate if exists m(%s);" % type)
        cur.execute("create aggregate m(%s) (sfunc=%s, stype=%s);"
            % (type, f, type))

    def test_mpz(self, n, s):
        return self._test('mpz', s)

    def test_numeric(self, n, s):
        return self._test('numeric', s)

    def _test(self, type, s):
        cur = self.conn.cursor()
        cur.execute("""
            explain analyze
            select m(n::%s)
            from generate_series(1,%s) n;
            """ % (type, s))
        recs = cur.fetchall()
        return float(recs[-1][0].split()[-2])


class TableSize(Benchmark):
    """Measure the size of a table on disk with mpz and decimal columns.
    """
    title = "Size of a table on disk"
    xlabel = "Numbers size (in decimal digits)"
    ylabel = "Size (in pages)"

    def setup_int8(self, n, s):
        if s > 18:
            raise SkipTest("skipping test with %d digits" % s)

    def setup_int4(self, n, s):
        if s > 9:
            raise SkipTest("skipping test with %d digits" % s)

    def test_mpq(self, n, s):
        return self._test(n, s, "test_size_mpq",
            """
            create table test_size_mpq (n mpq);
            insert into test_size_mpq
                select urandomm(%(max)s::mpz)s
                from generate_series(1, %(n)s);
            """)

    def test_mpz(self, n, s):
        return self._test(n, s, "test_size_mpz",
            """
            create table test_size_mpz (n mpz);
            insert into test_size_mpz
                select urandomm(%(max)s::mpz)s
                from generate_series(1, %(n)s);
            """)

    def test_numeric(self, n, s):
        return self._test(n, s, "test_size_numeric",
            """
            create table test_size_numeric (n numeric);
            insert into test_size_numeric
                select urandomm(%(max)s::mpz)s
                from generate_series(1, %(n)s);
            """)

    def test_int8(self, n, s):
        return self._test(n, s, "test_size_int8",
            """
            create table test_size_int8 (n int8);
            insert into test_size_int8
                select urandomm(%(max)s::mpz)s
                from generate_series(1, %(n)s);
            """)

    def test_int4(self, n, s):
        return self._test(n, s, "test_size_int4",
            """
            create table test_size_int4 (n int4);
            insert into test_size_int4
                select urandomm(%(max)s::mpz)s
                from generate_series(1, %(n)s);
            """)

    def _test(self, n, s, table, query):
        cur = self.conn.cursor()
        cur.execute("""
            drop table if exists %s;
            select randinit();
            select randseed(31415926);
            """ % table)
        cur.execute(query, {'n': n, 'max': 10 ** s})
        cur.execute("vacuum analyze %s;" % table)
        cur.execute(
            "select relpages from pg_class where relname = %s;" ,
            (table, ))
        return cur.fetchone()[0]


def main():
    opt = parse_args()
    cls = globals()[opt.test_name]
    test = cls(opt)
    test.run()


def parse_args():
    # Find the tests available
    test_names = sorted(o.__name__ for o in globals().values()
        if isinstance(o, type)
        and issubclass(o, Benchmark) and o is not Benchmark)

    from optparse import OptionParser
    parser = OptionParser(usage="%prog [OPTIONS] TEST-NAME",
        description="choose a test from: %s" % ', '.join(test_names))
    parser.add_option('-n', '--nsamples', type=int, action='append',
        help="number of numbers to sum. specify once or more")
    parser.add_option('-s', '--size', type=int, action='append',
        help="size of numbers to sum. specify once or more")
    parser.add_option('-p', '--scale', type=int,
        help="scale of the tested numbers, if applicable")
    parser.add_option('-r', '--repeats', type=int, default=3,
        help="test repetitions (take the best value) [default: %default]")
    parser.add_option('--dsn', help="database to connect", default="")

    opt, args = parser.parse_args()
    if len(args) != 1:
        parser.error("please specify one test")

    opt.test_name = args[0]
    if opt.test_name not in test_names:
        parser.error("bad test name: '%s'" % opt.test_name)

    if not opt.nsamples or not opt.size:
        parser.error("please specify -n and -s at least once")

    return opt


if __name__ == '__main__':
    sys.exit(main())