File: 09_shuffle.cpp

package info (click to toggle)
ispc 1.26.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 95,356 kB
  • sloc: cpp: 55,778; python: 6,681; yacc: 3,074; lex: 1,095; ansic: 714; sh: 283; makefile: 16
file content (135 lines) | stat: -rw-r--r-- 7,588 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright (c) 2024, Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause

#include "../common.h"
#include "09_shuffle_ispc.h"
#include <benchmark/benchmark.h>
#include <bitset>
#include <cstdint>
#include <stdio.h>

// Documentation for the benchmark
static Docs
    docs("Check performance of shuffle operations with non-constant indexes.\n"
         "ISPC has two shuffle operations:\n"
         "1. shuffle(T, int)\n"
         "2. shuffle(T, T, int)\n"
         "On some targets and for some types, shuffle operations are implemented with target-specific instructions; on "
         "others, it is just a generic implementation.\n"
         "This benchmark allows us to effectively compare and tune different implementations for different types.\n"
         "Expectation:\n"
         " - No regressions\n");

// Warm-up run for benchmarking
WARM_UP_RUN();

// Benchmark arguments
// Minimum size is maximum target width, i.e., 64. Larger buffer is better, but preferably stay within L1 cache.
#define ARGS RangeMultiplier(2)->Range(64, 64 << 10)->Complexity(benchmark::oN)

constexpr int permutation = 1;

// Initialization function for single vector shuffle
template <typename T> static void init1(T *src_a, T *dst, int count) {
    for (int i = 0; i < count; ++i) {
        src_a[i] = static_cast<T>(i);
        dst[i] = 0;
    }
}

// Initialization function for two vectors shuffle
template <typename T> static void init2(T *src_a, T *src_b, T *dst, int count) {
    for (int i = 0; i < count; ++i) {
        src_a[i] = static_cast<T>(i);
        src_b[i] = static_cast<T>(i);
        dst[i] = 0;
    }
}

// Check function for single vector shuffle
template <typename T> static void check1(T *src_a, T *dst, int count) {
    for (int i = 0; i < count; ++i) {
        if (dst[i] != src_a[i]) {
            if constexpr (std::is_same<T, int8_t>::value || std::is_same<T, int16_t>::value ||
                          std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value) {
                printf("Error at i=%d: dst[%i]=%lld, but expected: %lld\n", i, i, (long long)dst[i],
                       (long long)src_a[i]);
            } else if constexpr (std::is_same<T, float>::value) {
                printf("Error at i=%d: dst[%i]=%f, but expected: %f\n", i, i, dst[i], src_a[i]);
            } else if constexpr (std::is_same<T, double>::value) {
                printf("Error at i=%d: dst[%i]=%lf, but expected: %lf\n", i, i, dst[i], src_a[i]);
            }
            return;
        }
    }
}

// Check function for two vector shuffle
template <typename T> static void check2(T *src_a, T *src_b, T *dst, int count) {
    for (int i = 0; i < count; ++i) {
        if (dst[i] != src_a[i]) {
            if constexpr (std::is_same<T, int8_t>::value || std::is_same<T, int16_t>::value ||
                          std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value) {
                printf("Error at i=%d: dst[%i]=%lld, but expected: %lld\n", i, i, (long long)dst[i],
                       (long long)src_a[i]);
            } else if constexpr (std::is_same<T, float>::value) {
                printf("Error at i=%d: dst[%i]=%f, but expected: %f\n", i, i, dst[i], src_a[i]);
            } else if constexpr (std::is_same<T, double>::value) {
                printf("Error at i=%d: dst[%i]=%lf, but expected: %lf\n", i, i, dst[i], src_a[i]);
            }
            return;
        }
    }
}

// Macro to define benchmark functions for single vector shuffle
#define SHUFFLE_1(T_C, T_ISPC)                                                                                         \
    static void shuffle1_##T_C(benchmark::State &state) {                                                              \
        int count = static_cast<int>(state.range(0));                                                                  \
        T_C *src_a = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                    \
        T_C *dst = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                      \
        init1(src_a, dst, count);                                                                                      \
        for (auto _ : state) {                                                                                         \
            ispc::Shuffle1_##T_ISPC(src_a, dst, permutation, count);                                                   \
        }                                                                                                              \
        check1(src_a, dst, count);                                                                                     \
        aligned_free_helper(src_a);                                                                                    \
        aligned_free_helper(dst);                                                                                      \
        state.SetComplexityN(state.range(0));                                                                          \
    }                                                                                                                  \
    BENCHMARK(shuffle1_##T_C)->ARGS;

SHUFFLE_1(int8_t, int8)
SHUFFLE_1(int16_t, int16)
SHUFFLE_1(int, int)
SHUFFLE_1(float, float)
SHUFFLE_1(double, double)
SHUFFLE_1(int64_t, int64)

// Macro to define benchmark functions for two vector shuffle
#define SHUFFLE_2(T_C, T_ISPC)                                                                                         \
    static void shuffle2_##T_C(benchmark::State &state) {                                                              \
        int count = static_cast<int>(state.range(0));                                                                  \
        T_C *src_a = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                    \
        T_C *src_b = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                    \
        T_C *dst = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                      \
        init2(src_a, src_b, dst, count);                                                                               \
        for (auto _ : state) {                                                                                         \
            ispc::Shuffle2_##T_ISPC(src_a, src_b, dst, permutation, count);                                            \
        }                                                                                                              \
        check2(src_a, src_b, dst, count);                                                                              \
        aligned_free_helper(src_a);                                                                                    \
        aligned_free_helper(src_b);                                                                                    \
        aligned_free_helper(dst);                                                                                      \
        state.SetComplexityN(state.range(0));                                                                          \
    }                                                                                                                  \
    BENCHMARK(shuffle2_##T_C)->ARGS;

SHUFFLE_2(int8_t, int8)
SHUFFLE_2(int16_t, int16)
SHUFFLE_2(int, int)
SHUFFLE_2(float, float)
SHUFFLE_2(double, double)
SHUFFLE_2(int64_t, int64)

BENCHMARK_MAIN();