File: 06_math.cpp

package info (click to toggle)
ispc 1.26.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 95,356 kB
  • sloc: cpp: 55,778; python: 6,681; yacc: 3,074; lex: 1,095; ansic: 714; sh: 283; makefile: 16
file content (254 lines) | stat: -rw-r--r-- 14,551 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// Copyright (c) 2021-2024, Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause

#include <benchmark/benchmark.h>
#include <cmath>
#include <cstdint>
#include <stdio.h>

#include "../common.h"
#include "06_math_ispc.h"

static Docs docs("Check perfomance of math functions.\n"
                 "Things to note:\n"
                 " - benchmarks are focused on performance, not accuracy verification.\n"
                 " - math functions are invoked in unmasked context.\n"
                 " - workload sizes are designed to hit different caches (L1/L2/L3).\n"
                 "Expectations:\n"
                 " - No regressions\n");

WARM_UP_RUN();

// Minimum size is maximum target width * 4, i.e. 64*4 = 256.
// 256 * sizeof (int) = 1kb - expected to reside in L1
// 256 * sizeof (int) << 4 = 16kb - expected to reside in L1
// 256 * sizeof (int) << 7 = 128kb - expected to reside in L2
// 256 * sizeof (int) << 12 = 4 Mb - expected to reside in L3.
#define ARGS Arg(256)->Arg(256 << 4)->Arg(256 << 7)->Arg(256 << 12)

// Helper functions
const double PI = 3.141592653589793;

template <typename T> static void init_linear(T *src, T *dst, int count) {
    for (int i = 0; i < count; i++) {
        src[i] = static_cast<T>(i + 1);
        dst[i] = 0;
    }
}

// First argument is linear in range [1, 31], second argument is in range of [0, 2].
template <typename T> static void init_linear2(T *src1, T *src2, T *dst, int count) {
    for (int i = 0; i < count; i++) {
        src1[i] = static_cast<T>((i % 30) + 1);
        src2[i] = static_cast<T>(std::fmod(static_cast<T>(i) * PI, 2.0));
        dst[i] = 0;
    }
}

// First argument is linear, second argument is linear in range of [0, 20].
template <typename T> static void init_ldexp(T *src1, int *src2, T *dst, int count) {
    for (int i = 0; i < count; i++) {
        src1[i] = static_cast<T>(i + 1);
        src2[i] = i % 20;
        dst[i] = 0;
    }
}

// The source argument is linear, two destinations.
template <typename T> static void init_frexp(T *src, T *dst1, int *dst2, int count) {
    for (int i = 0; i < count; i++) {
        src[i] = static_cast<T>(i + 1);
        dst1[i] = 0;
        dst2[i] = 0;
    }
}

// Generate numbers in the range (-pi, pi).
template <typename T> static void init_pi(T *src, T *dst, int count) {
    for (int i = 0; i < count; i++) {
        src[i] = static_cast<T>(std::fmod(static_cast<T>(i), PI) - (PI / 2));
        dst[i] = 0;
    }
}

// Generate numbers in the range (-pi/2+eps, pi/2-eps).
// Use eps, to avoid precision problems for tan().
template <typename T> static void init_half_pi(T *src, T *dst, int count) {
    T eps = 0.01f;
    for (int i = 0; i < count; i++) {
        src[i] = static_cast<T>(std::fmod(static_cast<T>(i), (PI - 2 * eps)) - (PI / 2 - eps));
        dst[i] = 0;
    }
}

// Generate numbers in the range (-1+eps, 1-eps).
// Use eps, to avoid precision problems for atan().
template <typename T> static void init_one(T *src, T *dst, int count) {
    T eps = 0.01f;
    for (int i = 0; i < count; i++) {
        src[i] = static_cast<T>(std::fmod(static_cast<T>(i) * PI, 2.0 - 2 * eps) - (1.0 - eps));
        dst[i] = 0;
    }
}

template <typename T, typename F> static void check(T *src, T *dst, int count, F fp) {
    T eps = 0.001f;
    for (int i = 0; i < count; i++) {
        T expected = fp(src[i]);
        if (std::abs(expected - dst[i]) > eps) {
            printf("Error i=%d, expected %g, return %g\n", i, expected, dst[i]);
            return;
        }
    }
}

template <typename T, typename F> static void check2(T *src1, T *src2, T *dst, int count, F fp) {
    T eps = 0.001f;
    for (int i = 0; i < count; i++) {
        T expected = fp(src1[i], src2[i]);
        if (std::abs(expected - dst[i]) > eps) {
            printf("Error i=%d, expected %g, return %g\n", i, expected, dst[i]);
            return;
        }
    }
}

template <typename T> static void check_ldexp(T *src1, int *src2, T *dst, int count) {
    T eps = 0.001f;
    for (int i = 0; i < count; i++) {
        T expected = std::ldexp(src1[i], src2[i]);
        if (std::abs(expected - dst[i]) > eps) {
            printf("Error i=%d, expected %g, return %g\n", i, expected, dst[i]);
            return;
        }
    }
}

template <typename T> static void check_frexp(T *src, T *dst1, int *dst2, int count) {
    T eps = 0.001f;
    for (int i = 0; i < count; i++) {
        int pow = 0;
        T expected = std::frexp(src[i], &pow);
        if (std::abs(expected - dst1[i]) > eps || pow != dst2[i]) {
            printf("Error i=%d, expected %g, return %g\n", i, expected, dst1[i]);
            return;
        }
    }
}

// Functions with single source, single destination.
#define TEST(NAME, T, INIT, CHECK)                                                                                     \
    static void NAME##_##T(benchmark::State &state) {                                                                  \
        int count = static_cast<int>(state.range(0));                                                                  \
        T *src = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                            \
        T *dst = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                            \
        INIT(src, dst, count);                                                                                         \
                                                                                                                       \
        for (auto _ : state) {                                                                                         \
            ispc::NAME##_##T(src, dst, count);                                                                         \
        }                                                                                                              \
                                                                                                                       \
        check(src, dst, count, [](T x) { return CHECK; });                                                             \
        aligned_free_helper(src);                                                                                      \
        aligned_free_helper(dst);                                                                                      \
    }                                                                                                                  \
    BENCHMARK(NAME##_##T)->ARGS;

// Functions with two sources of the same type, single destination.
#define TEST2(NAME, T, INIT, CHECK)                                                                                    \
    static void NAME##_##T(benchmark::State &state) {                                                                  \
        int count = static_cast<int>(state.range(0));                                                                  \
        T *src1 = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                           \
        T *src2 = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                           \
        T *dst = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                            \
        INIT(src1, src2, dst, count);                                                                                  \
                                                                                                                       \
        for (auto _ : state) {                                                                                         \
            ispc::NAME##_##T(src1, src2, dst, count);                                                                  \
        }                                                                                                              \
                                                                                                                       \
        check2(src1, src2, dst, count, [](T x, T y) { return CHECK; });                                                \
        aligned_free_helper(src1);                                                                                     \
        aligned_free_helper(src2);                                                                                     \
        aligned_free_helper(dst);                                                                                      \
    }                                                                                                                  \
    BENCHMARK(NAME##_##T)->ARGS;

// Functions with two sources of type T and int, single destination.
#define TEST3(NAME, T, INIT)                                                                                           \
    static void NAME##_##T(benchmark::State &state) {                                                                  \
        int count = static_cast<int>(state.range(0));                                                                  \
        T *src1 = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                           \
        int *src2 = static_cast<int *>(aligned_alloc_helper(sizeof(int) * count));                                     \
        T *dst = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                            \
        INIT(src1, src2, dst, count);                                                                                  \
                                                                                                                       \
        for (auto _ : state) {                                                                                         \
            ispc::NAME##_##T(src1, src2, dst, count);                                                                  \
        }                                                                                                              \
                                                                                                                       \
        check_##NAME(src1, src2, dst, count);                                                                          \
        aligned_free_helper(src1);                                                                                     \
        aligned_free_helper(src2);                                                                                     \
        aligned_free_helper(dst);                                                                                      \
    }                                                                                                                  \
    BENCHMARK(NAME##_##T)->ARGS;

// Functions with single source and two destinations (T and int).
#define TEST4(NAME, T, INIT)                                                                                           \
    static void NAME##_##T(benchmark::State &state) {                                                                  \
        int count = static_cast<int>(state.range(0));                                                                  \
        T *src = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                            \
        T *dst1 = static_cast<T *>(aligned_alloc_helper(sizeof(T) * count));                                           \
        int *dst2 = static_cast<int *>(aligned_alloc_helper(sizeof(int) * count));                                     \
        INIT(src, dst1, dst2, count);                                                                                  \
                                                                                                                       \
        for (auto _ : state) {                                                                                         \
            ispc::NAME##_##T(src, dst1, dst2, count);                                                                  \
        }                                                                                                              \
                                                                                                                       \
        check_##NAME(src, dst1, dst2, count);                                                                          \
        aligned_free_helper(src);                                                                                      \
        aligned_free_helper(dst1);                                                                                     \
        aligned_free_helper(dst2);                                                                                     \
    }                                                                                                                  \
    BENCHMARK(NAME##_##T)->ARGS;

TEST(sqrt, float, init_linear, std::sqrt(x))
TEST(sqrt, double, init_linear, std::sqrt(x))
TEST(rsqrt, float, init_linear, 1.0f / std::sqrt(x))
TEST(rsqrt, double, init_linear, 1.0 / std::sqrt(x))
TEST(rsqrt_fast, float, init_linear, 1.0f / std::sqrt(x))
TEST(rsqrt_fast, double, init_linear, 1.0 / std::sqrt(x))
TEST(rcp, float, init_linear, 1.0f / x)
TEST(rcp, double, init_linear, 1.0 / x)
TEST(rcp_fast, float, init_linear, 1.0f / x)
TEST(rcp_fast, double, init_linear, 1.0 / x)
TEST3(ldexp, float, init_ldexp)
TEST3(ldexp, double, init_ldexp)
TEST4(frexp, float, init_frexp)
TEST4(frexp, double, init_frexp)

TEST(sin, float, init_pi, std::sin(x))
TEST(sin, double, init_pi, std::sin(x))
TEST(asin, float, init_one, std::asin(x))
TEST(asin, double, init_one, std::asin(x))
TEST(cos, float, init_pi, std::cos(x))
TEST(cos, double, init_pi, std::cos(x))
TEST(acos, float, init_one, std::acos(x))
TEST(acos, double, init_one, std::acos(x))
TEST(tan, float, init_half_pi, std::tan(x))
TEST(tan, double, init_half_pi, std::tan(x))
TEST(atan, float, init_linear, std::atan(x))
TEST(atan, double, init_linear, std::atan(x))
TEST2(atan2, float, init_linear2, std::atan2(x, y))
TEST2(atan2, double, init_linear2, std::atan2(x, y))

TEST(exp, float, init_pi, std::exp(x))
TEST(exp, double, init_pi, std::exp(x))
TEST(log, float, init_linear, std::log(x))
TEST(log, double, init_linear, std::log(x))
TEST2(pow, float, init_linear2, std::pow(x, y))
TEST2(pow, double, init_linear2, std::pow(x, y))

BENCHMARK_MAIN();