1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <cstdio>
#include <sstream>
#include <iostream>
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
#else
#include <Kokkos_Core.hpp>
#endif
namespace Test {
namespace {
template <class T, int N>
class MyArray {
public:
T values[N];
KOKKOS_INLINE_FUNCTION
void operator+=(const MyArray& src) {
for (int i = 0; i < N; i++) values[i] += src.values[i];
}
};
template <class T, int N, class PolicyType, int S>
struct FunctorFor {
double static_array[S];
KOKKOS_INLINE_FUNCTION
void operator()(const typename PolicyType::member_type& /*team*/) const {}
};
template <class T, int N, class PolicyType, int S>
struct FunctorReduce {
double static_array[S];
KOKKOS_INLINE_FUNCTION
void operator()(const typename PolicyType::member_type& /*team*/,
MyArray<T, N>& lval) const {
for (int j = 0; j < N; j++) lval.values[j] += 1 + lval.values[0];
}
};
} // namespace
using policy_type = Kokkos::TeamPolicy<TEST_EXECSPACE>;
using policy_type_128_8 =
Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128, 8> >;
// We need to special case for NVIDIA architectures which don't have space for
// 2048 threads ptxas warns and with errors as warning errors out: "ptxas error
// : Value of threads per SM for entry _ZN6... is out of range. .minnctapersm
// will be ignored" And yes I understand I am lying now with the name of the
// policy
#if defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE86) || \
defined(KOKKOS_ARCH_AMPERE87) || defined(KOKKOS_ARCH_ADA89) || \
defined(KOKKOS_ARCH_BLACKWELL120)
using policy_type_1024_2 =
Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<1024, 1> >;
#else
using policy_type_1024_2 =
Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<1024, 2> >;
#endif
template <class T, int N, class PolicyType, int S>
void test_team_policy_max_recommended_static_size(int scratch_size) {
PolicyType p = PolicyType(10000, Kokkos::AUTO, 4)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size));
int team_size_max_for = p.team_size_max(FunctorFor<T, N, PolicyType, S>(),
Kokkos::ParallelForTag());
int team_size_rec_for = p.team_size_recommended(
FunctorFor<T, N, PolicyType, S>(), Kokkos::ParallelForTag());
int team_size_max_reduce = p.team_size_max(
FunctorReduce<T, N, PolicyType, S>(), Kokkos::ParallelReduceTag());
int team_size_rec_reduce = p.team_size_recommended(
FunctorReduce<T, N, PolicyType, S>(), Kokkos::ParallelReduceTag());
ASSERT_GE(team_size_max_for, team_size_rec_for);
ASSERT_GE(team_size_max_reduce, team_size_rec_reduce);
ASSERT_GE(team_size_max_for, team_size_max_reduce);
Kokkos::parallel_for(PolicyType(10000, team_size_max_for, 4)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
FunctorFor<T, N, PolicyType, S>());
Kokkos::parallel_for(PolicyType(10000, team_size_rec_for, 4)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
FunctorFor<T, N, PolicyType, S>());
MyArray<T, N> val;
double n_leagues = 10000;
Kokkos::parallel_reduce(
PolicyType(n_leagues, team_size_max_reduce, 4)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
FunctorReduce<T, N, PolicyType, S>(), val);
Kokkos::parallel_reduce(
PolicyType(n_leagues, team_size_rec_reduce, 4)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
FunctorReduce<T, N, PolicyType, S>(), val);
Kokkos::fence();
}
template <class T, int N, class PolicyType>
void test_team_policy_max_recommended(int scratch_size) {
test_team_policy_max_recommended_static_size<T, N, PolicyType, 1>(
scratch_size);
// FIXME_SYCL prevent running out of total kernel argument size limit
#ifdef KOKKOS_ENABLE_SYCL
test_team_policy_max_recommended_static_size<T, N, PolicyType, 100>(
scratch_size);
#else
test_team_policy_max_recommended_static_size<T, N, PolicyType, 1000>(
scratch_size);
#endif
}
TEST(TEST_CATEGORY, team_policy_max_recommended) {
int max_scratch_size = policy_type::scratch_size_max(0);
test_team_policy_max_recommended<double, 2, policy_type>(0);
test_team_policy_max_recommended<double, 2, policy_type>(max_scratch_size /
3);
test_team_policy_max_recommended<double, 2, policy_type>(max_scratch_size);
test_team_policy_max_recommended<double, 2, policy_type_128_8>(0);
test_team_policy_max_recommended<double, 2, policy_type_128_8>(
max_scratch_size / 3 / 8);
test_team_policy_max_recommended<double, 2, policy_type_128_8>(
max_scratch_size / 8);
test_team_policy_max_recommended<double, 2, policy_type_1024_2>(0);
test_team_policy_max_recommended<double, 2, policy_type_1024_2>(
max_scratch_size / 3 / 2);
test_team_policy_max_recommended<double, 2, policy_type_1024_2>(
max_scratch_size / 2);
test_team_policy_max_recommended<double, 16, policy_type>(0);
test_team_policy_max_recommended<double, 16, policy_type>(max_scratch_size /
3);
test_team_policy_max_recommended<double, 16, policy_type>(max_scratch_size);
test_team_policy_max_recommended<double, 16, policy_type_128_8>(0);
test_team_policy_max_recommended<double, 16, policy_type_128_8>(
max_scratch_size / 3 / 8);
test_team_policy_max_recommended<double, 16, policy_type_128_8>(
max_scratch_size / 8);
test_team_policy_max_recommended<double, 16, policy_type_1024_2>(0);
test_team_policy_max_recommended<double, 16, policy_type_1024_2>(
max_scratch_size / 3 / 2);
test_team_policy_max_recommended<double, 16, policy_type_1024_2>(
max_scratch_size / 2);
}
template <typename TeamHandleType, typename ReducerValueType>
struct MinMaxTeamLeagueRank {
KOKKOS_FUNCTION void operator()(const TeamHandleType& team,
ReducerValueType& update) const {
int const x = team.league_rank();
if (x < update.min_val) {
update.min_val = x;
}
if (x > update.max_val) {
update.max_val = x;
}
}
};
TEST(TEST_CATEGORY, team_policy_minmax_scalar_without_plus_equal_k) {
using ExecSpace = TEST_EXECSPACE;
using ReducerType = Kokkos::MinMax<int, Kokkos::HostSpace>;
using ReducerValueType = typename ReducerType::value_type;
using DynamicScheduleType = Kokkos::Schedule<Kokkos::Dynamic>;
using TeamPolicyType = Kokkos::TeamPolicy<ExecSpace, DynamicScheduleType>;
using TeamHandleType = typename TeamPolicyType::member_type;
static constexpr int num_teams = 17;
ReducerValueType val;
ReducerType reducer(val);
TeamPolicyType p(num_teams, Kokkos::AUTO);
MinMaxTeamLeagueRank<TeamHandleType, ReducerValueType> f1;
Kokkos::parallel_reduce(p, f1, reducer);
ASSERT_EQ(val.min_val, 0);
ASSERT_EQ(val.max_val, num_teams - 1);
}
} // namespace Test
|