1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
// Copyright (c) 2021-2024, Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
#include <benchmark/benchmark.h>
#include <bitset>
#include <cstdint>
#include <stdio.h>
#include "../common.h"
#include "04_fastdiv_ispc.h"
static Docs docs("Check fast_idiv implmentation of stdlib functions:\n"
"[int8, uint8, int16, uint16, int32, uint32, int64, uint64] x [13, 16] versions.\n"
"Conditions to trigger fast_idiv:\n"
" - The value being divided must be an int8/16/32.\n"
" - The divisor must be the same compile-time constant value for all of the vector lanes.\n"
"Expectation:\n"
" - No regressions\n");
WARM_UP_RUN();
// Minimum size is maximum target width, i.e. 64.
// Larger buffer is better, but preferably to stay within L1.
#define ARGS Arg(8192)
// #define ARGS RangeMultiplier(2)->Range(64, 64<<15)->Complexity(benchmark::oN)
template <typename T> static void init_src(T *src, int count) {
for (int i = 0; i < count; i++) {
// These computations may involve overflow/underflow, but this is ok.
src[i] = ((T)i) - ((T)count / 2);
}
}
template <typename T> static void init_dst(T *dst, int count) {
for (int i = 0; i < count; i++) {
dst[i] = 0;
}
}
template <typename T> static void check(T *src, T *dst, int divisor, int count) {
for (int i = 0; i < count; i++) {
T val = src[i] / divisor;
if (val != dst[i]) {
printf("Error i=%d\n", i);
return;
}
}
}
#define FASTDIV(T_C, T_ISPC, DIV_VAL) \
static void fastdiv_##T_ISPC##_##DIV_VAL(benchmark::State &state) { \
int count = static_cast<int>(state.range(0)); \
T_C *dst = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count)); \
T_C *src = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count)); \
init_src(src, count); \
init_dst(dst, count); \
\
for (auto _ : state) { \
ispc::fastdiv_##T_ISPC##_##DIV_VAL(src, dst, count); \
} \
\
check(src, dst, DIV_VAL, count); \
aligned_free_helper(src); \
aligned_free_helper(dst); \
state.SetComplexityN(state.range(0)); \
} \
BENCHMARK(fastdiv_##T_ISPC##_##DIV_VAL)->ARGS;
FASTDIV(uint64_t, uint64, 13)
FASTDIV(int64_t, int64, 13)
FASTDIV(uint32_t, uint32, 13)
FASTDIV(int32_t, int32, 13)
FASTDIV(uint16_t, uint16, 13)
FASTDIV(int16_t, int16, 13)
FASTDIV(uint8_t, uint8, 13)
FASTDIV(int8_t, int8, 13)
FASTDIV(uint64_t, uint64, 16)
FASTDIV(int64_t, int64, 16)
FASTDIV(uint32_t, uint32, 16)
FASTDIV(int32_t, int32, 16)
FASTDIV(uint16_t, uint16, 16)
FASTDIV(int16_t, int16, 16)
FASTDIV(uint8_t, uint8, 16)
FASTDIV(int8_t, int8, 16)
BENCHMARK_MAIN();
|