1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
#else
#include <Kokkos_Core.hpp>
#endif
#include "policy_perf_test.hpp"
#include <iostream>
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
if (argc < 10) {
printf(" Ten arguments are needed to run this program:\n");
printf(
" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, "
"(5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, "
"(9)schedule, (10)test_type\n");
printf(" team_range: number of teams (league_size)\n");
printf(" thread_range: range for nested TeamThreadRange parallel_*\n");
printf(" vector_range: range for nested ThreadVectorRange parallel_*\n");
printf(" outer_repeat: number of repeats for outer parallel_* call\n");
printf(
" thread_repeat: number of repeats for TeamThreadRange parallel_* "
"call\n");
printf(
" vector_repeat: number of repeats for ThreadVectorRange parallel_* "
"call\n");
printf(" team_size: number of team members (team_size)\n");
printf(" vector_size: desired vectorization (if possible)\n");
printf(" schedule: 1 == Static 2 == Dynamic\n");
printf(
" test_type: 3-digit code XYZ for testing (nested) parallel_*\n");
printf(
" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in "
"{0,1,2}\n");
printf(" TeamPolicy:\n");
printf(
" X: 0 = none (never used, makes no sense); 1 = "
"parallel_for; 2 = parallel_reduce\n");
printf(
" Y: 0 = none; 1 = parallel_for; 2 = "
"parallel_reduce\n");
printf(
" Z: 0 = none; 1 = parallel_for; 2 = "
"parallel_reduce\n");
printf(" RangePolicy:\n");
printf(
" X: 3 = parallel_for; 4 = parallel_reduce; 5 = "
"parallel_scan\n");
printf(" Y: 0 = none\n");
printf(" Z: 0 = none\n");
printf(" Example Input:\n");
printf(" 100000 32 32 100 100 100 8 1 1 100\n");
Kokkos::finalize();
return 0;
}
int team_range = std::stoi(argv[1]);
int thread_range = std::stoi(argv[2]);
int vector_range = std::stoi(argv[3]);
int outer_repeat = std::stoi(argv[4]);
int thread_repeat = std::stoi(argv[5]);
int vector_repeat = std::stoi(argv[6]);
int team_size = std::stoi(argv[7]);
int vector_size = std::stoi(argv[8]);
int schedule = std::stoi(argv[9]);
int test_type = std::stoi(argv[10]);
int disable_verbose_output = 0;
if (argc > 11) {
disable_verbose_output = std::stoi(argv[11]);
}
if (schedule != 1 && schedule != 2) {
printf("schedule: %d\n", schedule);
printf("Options for schedule are: 1 == Static 2 == Dynamic\n");
Kokkos::finalize();
return -1;
}
if (test_type != 100 && test_type != 110 && test_type != 111 &&
test_type != 112 && test_type != 120 && test_type != 121 &&
test_type != 122 && test_type != 200 && test_type != 210 &&
test_type != 211 && test_type != 212 && test_type != 220 &&
test_type != 221 && test_type != 222 && test_type != 300 &&
test_type != 400 && test_type != 500) {
printf("Incorrect test_type option\n");
Kokkos::finalize();
return -2;
}
double result = 0.0;
Kokkos::parallel_reduce(
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type&, double& lval) {
lval += 1;
},
result);
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;
using view_type_2d = Kokkos::View<double**, Kokkos::LayoutRight>;
using view_type_3d = Kokkos::View<double***, Kokkos::LayoutRight>;
// Allocate view without initializing
// Call a 'warmup' test with 1 repeat - this will initialize the corresponding
// view appropriately for test and should obey first-touch etc Second call to
// test is the one we actually care about and time
view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"),
static_cast<size_t>(team_range) * team_size);
view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"),
static_cast<size_t>(team_range) * team_size, thread_range);
view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"),
static_cast<size_t>(team_range) * team_size, thread_range,
vector_range);
double result_computed = 0.0;
double result_expect = 0.0;
double time = 0.0;
if (schedule == 1) {
if (test_type != 500) {
// warmup - no repeat of loops
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
team_range, thread_range, vector_range, 1, 1, 1, team_size,
vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect,
time);
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
result_computed, result_expect, time);
} else {
// parallel_scan: initialize 1d view for parallel_scan
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
team_range, thread_range, vector_range, 1, 1, 1, team_size,
vector_size, 100, v_1, v_2, v_3, result_computed, result_expect,
time);
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
result_computed, result_expect, time);
}
}
if (schedule == 2) {
if (test_type != 500) {
// warmup - no repeat of loops
test_policy<Kokkos::Schedule<Kokkos::Dynamic>, int>(
team_range, thread_range, vector_range, 1, 1, 1, team_size,
vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect,
time);
test_policy<Kokkos::Schedule<Kokkos::Dynamic>, int>(
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
result_computed, result_expect, time);
} else {
// parallel_scan: initialize 1d view for parallel_scan
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
team_range, thread_range, vector_range, 1, 1, 1, team_size,
vector_size, 100, v_1, v_2, v_3, result_computed, result_expect,
time);
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
result_computed, result_expect, time);
}
}
if (disable_verbose_output == 0) {
printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n", team_range,
thread_range, vector_range, outer_repeat, thread_repeat,
vector_repeat, team_size, vector_size, schedule, test_type,
result_computed, result_expect, time);
} else {
printf("%lf\n", time);
}
Kokkos::finalize();
return 0;
}
|