1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
import kokkos.unordered_map;
#else
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#endif
#include <vector>
#include <algorithm>
#include <Kokkos_Timer.hpp>
// This test will simulate global ids
namespace Performance {
static const unsigned begin_id_size = 256u;
static const unsigned end_id_size = 1u << 22;
static const unsigned id_step = 2u;
union helper {
uint32_t word;
uint8_t byte[4];
};
template <typename Device>
struct generate_ids {
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<uint32_t*, execution_space>;
local_id_view local_2_global;
generate_ids(local_id_view& ids) : local_2_global(ids) {
Kokkos::parallel_for(local_2_global.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const {
helper x = {static_cast<uint32_t>(i)};
// shuffle the bytes of i to create a unique, semi-random global_id
x.word = ~x.word;
uint8_t tmp = x.byte[3];
x.byte[3] = x.byte[1];
x.byte[1] = tmp;
tmp = x.byte[2];
x.byte[2] = x.byte[0];
x.byte[0] = tmp;
local_2_global[i] = x.word;
}
};
template <typename Device>
struct fill_map {
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<const uint32_t*, execution_space,
Kokkos::MemoryRandomAccess>;
using global_id_view =
Kokkos::UnorderedMap<uint32_t, size_type, execution_space>;
global_id_view global_2_local;
local_id_view local_2_global;
fill_map(global_id_view gIds, local_id_view lIds)
: global_2_local(gIds), local_2_global(lIds) {
Kokkos::parallel_for(local_2_global.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const {
global_2_local.insert(local_2_global[i], i);
}
};
template <typename Device>
struct find_test {
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<const uint32_t*, execution_space,
Kokkos::MemoryRandomAccess>;
using global_id_view =
Kokkos::UnorderedMap<const uint32_t, const size_type, execution_space>;
global_id_view global_2_local;
local_id_view local_2_global;
using value_type = size_t;
find_test(global_id_view gIds, local_id_view lIds, value_type& num_errors)
: global_2_local(gIds), local_2_global(lIds) {
Kokkos::parallel_reduce(local_2_global.extent(0), *this, num_errors);
}
KOKKOS_INLINE_FUNCTION
void init(value_type& v) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join(value_type& dst, value_type const& src) const { dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(size_type i, value_type& num_errors) const {
uint32_t index = global_2_local.find(local_2_global[i]);
if (global_2_local.value_at(index) != i) ++num_errors;
}
};
template <typename Device>
void test_global_to_local_ids(unsigned num_ids) {
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<uint32_t*, execution_space>;
using global_id_view =
Kokkos::UnorderedMap<uint32_t, size_type, execution_space>;
// size
std::cout << num_ids << ", ";
double elasped_time = 0;
Kokkos::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local((3u * num_ids) / 2u);
// create
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
// generate unique ids
{ generate_ids<Device> gen(local_2_global); }
Device().fence();
// generate
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
{ fill_map<Device> fill(global_2_local, local_2_global); }
Device().fence();
// fill
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
size_t num_errors = 0;
for (int i = 0; i < 100; ++i) {
find_test<Device> find(global_2_local, local_2_global, num_errors);
}
Device().fence();
// find
elasped_time = timer.seconds();
std::cout << elasped_time << std::endl;
ASSERT_EQ(num_errors, 0u);
}
} // namespace Performance
#endif // KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|