File: benchmark.py

package info (click to toggle)
fast-histogram 0.14-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 328 kB
  • sloc: ansic: 974; python: 610; makefile: 6
file content (104 lines) | stat: -rw-r--r-- 2,947 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Script to compare the speedup provided by fast-histogram

from timeit import repeat, timeit

import numpy as np

SETUP_1D = """
import numpy as np
from numpy import histogram as np_histogram1d
from fast_histogram import histogram1d
x = np.random.random({size})
"""

NUMPY_1D_STMT = "np_histogram1d(x, range=[-1, 2], bins=30)"
FAST_1D_STMT = "histogram1d(x, range=[-1, 2], bins=30)"

SETUP_2D = """
import numpy as np
from numpy import histogram2d as np_histogram2d
from fast_histogram import histogram2d
x = np.random.random({size})
y = np.random.random({size})
"""

NUMPY_2D_STMT = "np_histogram2d(x, y, range=[[-1, 2], [-2, 4]], bins=30)"
FAST_2D_STMT = "histogram2d(x, y, range=[[-1, 2], [-2, 4]], bins=30)"

# How long each benchmark should aim to take
TARGET_TIME = 1.0


def time_stats(stmt=None, setup=None):
    # Call once to check how long it takes
    time_single = timeit(stmt=stmt, setup=setup, number=1)

    # Find out how many times we can call it. We always call it at least three
    # times for accuracy
    number = max(3, int(TARGET_TIME / time_single))

    print(f" -> estimated time to complete test: {time_single * 10 * number:.1f}s")

    times = repeat(stmt=stmt, setup=setup, repeat=10, number=number)

    return np.min(times) / number, np.mean(times) / number, np.median(times) / number


FMT_HEADER = "# {:7s}" + " {:10s}" * 12 + "\n"
FMT = "{:9d}" + " {:10.7e}" * 12 + "\n"

with open("benchmark_times.txt", "w") as f:
    f.write(
        FMT_HEADER.format(
            "size",
            "np_1d_min",
            "np_1d_mean",
            "np_1d_median",
            "fa_1d_min",
            "fa_1d_mean",
            "fa_1d_median",
            "np_2d_min",
            "np_2d_mean",
            "np_2d_median",
            "fa_2d_min",
            "fa_2d_mean",
            "fa_2d_median",
        )
    )

    for log10_size in range(0, 9):
        size = int(10**log10_size)

        print(f"Running benchmarks for size={size}")

        np_1d_min, np_1d_mean, np_1d_median = time_stats(
            stmt=NUMPY_1D_STMT, setup=SETUP_1D.format(size=size)
        )
        fa_1d_min, fa_1d_mean, fa_1d_median = time_stats(
            stmt=FAST_1D_STMT, setup=SETUP_1D.format(size=size)
        )
        np_2d_min, np_2d_mean, np_2d_median = time_stats(
            stmt=NUMPY_2D_STMT, setup=SETUP_2D.format(size=size)
        )
        fa_2d_min, fa_2d_mean, fa_2d_median = time_stats(
            stmt=FAST_2D_STMT, setup=SETUP_2D.format(size=size)
        )

        f.write(
            FMT.format(
                size,
                np_1d_min,
                np_1d_mean,
                np_1d_median,
                fa_1d_min,
                fa_1d_mean,
                fa_1d_median,
                np_2d_min,
                np_2d_mean,
                np_2d_median,
                fa_2d_min,
                fa_2d_mean,
                fa_2d_median,
            )
        )
        f.flush()