1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <vector>
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
#else
#include <Kokkos_Core.hpp>
#endif
namespace Test {
namespace {
template <class ExecSpace>
struct CountFillFunctor {
KOKKOS_INLINE_FUNCTION
std::int32_t operator()(std::int32_t row, float *fill) const {
auto n = (row % 4) + 1;
if (fill) {
for (std::int32_t j = 0; j < n; ++j) {
fill[j] = j + 1;
}
}
return n;
}
};
/* RunUpdateCrsTest
* 4 test cases:
* 1. use member object version which is constructed directly using the copy
* constructor
* 2. excplicity copy construct in local variable
* 3. construct default and assign to input object
* 4. construct object from views
*/
template <class CrsType, class ExecSpace, class scalarType>
struct RunUpdateCrsTest {
struct TestOne {};
struct TestTwo {};
struct TestThree {};
struct TestFour {};
CrsType graph;
RunUpdateCrsTest(CrsType g_in) : graph(g_in) {}
void run_test(int nTest) {
switch (nTest) {
case 1:
parallel_for(
"TestCrs1",
Kokkos::RangePolicy<ExecSpace, TestOne>(0, graph.numRows()), *this);
break;
case 2:
parallel_for(
"TestCrs2",
Kokkos::RangePolicy<ExecSpace, TestTwo>(0, graph.numRows()), *this);
break;
case 3:
parallel_for(
"TestCrs3",
Kokkos::RangePolicy<ExecSpace, TestThree>(0, graph.numRows()),
*this);
break;
case 4:
parallel_for(
"TestCrs4",
Kokkos::RangePolicy<ExecSpace, TestFour>(0, graph.numRows()),
*this);
break;
default: break;
}
}
KOKKOS_INLINE_FUNCTION
void updateGraph(const CrsType &g_in, const scalarType row) const {
auto row_map = g_in.row_map;
auto entries = g_in.entries;
auto j_start = row_map(row);
auto j_end = row_map(row + 1) - j_start;
for (scalarType j = 0; j < j_end; ++j) {
entries(j_start + j) = (j + 1) * (j + 1);
}
}
// Test Crs class from class member
KOKKOS_INLINE_FUNCTION
void operator()(const TestOne &, const scalarType row) const {
updateGraph(graph, row);
}
// Test Crs class from copy constructor (local_graph(graph)
KOKKOS_INLINE_FUNCTION
void operator()(const TestTwo &, const scalarType row) const {
CrsType local_graph(graph);
updateGraph(local_graph, row);
}
// Test Crs class from default constructor assigned to function parameter
KOKKOS_INLINE_FUNCTION
void operator()(const TestThree &, const scalarType row) const {
CrsType local_graph;
local_graph = graph;
updateGraph(local_graph, row);
}
// Test Crs class from local graph constructed from row_map and entities
// access on input parameter)
KOKKOS_INLINE_FUNCTION
void operator()(const TestFour &, const scalarType row) const {
CrsType local_graph(graph.row_map, graph.entries);
updateGraph(local_graph, row);
}
};
template <class ExecSpace>
void test_count_fill(std::int32_t nrows) {
Kokkos::Crs<float, ExecSpace, void, std::int32_t> graph;
Kokkos::count_and_fill_crs(graph, nrows, CountFillFunctor<ExecSpace>());
ASSERT_EQ(graph.numRows(), nrows);
auto row_map = Kokkos::create_mirror_view(graph.row_map);
Kokkos::deep_copy(row_map, graph.row_map);
auto entries = Kokkos::create_mirror_view(graph.entries);
Kokkos::deep_copy(entries, graph.entries);
for (std::int32_t row = 0; row < nrows; ++row) {
auto n = (row % 4) + 1;
ASSERT_EQ(row_map(row + 1) - row_map(row), n);
for (std::int32_t j = 0; j < n; ++j) {
ASSERT_EQ(entries(row_map(row) + j), j + 1);
}
}
}
// Test Crs Constructor / assignment operation by
// using count and fill to create/populate initial graph,
// then use parallel_for with Crs directly to update content
// then verify results
template <class ExecSpace>
void test_constructor(std::int32_t nrows) {
for (int nTest = 1; nTest < 5; nTest++) {
using crs_type = Kokkos::Crs<float, ExecSpace, void, std::int32_t>;
crs_type graph;
Kokkos::count_and_fill_crs(graph, nrows, CountFillFunctor<ExecSpace>());
ASSERT_EQ(graph.numRows(), nrows);
RunUpdateCrsTest<crs_type, ExecSpace, std::int32_t> crstest(graph);
crstest.run_test(nTest);
auto row_map = Kokkos::create_mirror_view(graph.row_map);
Kokkos::deep_copy(row_map, graph.row_map);
auto entries = Kokkos::create_mirror_view(graph.entries);
Kokkos::deep_copy(entries, graph.entries);
for (std::int32_t row = 0; row < nrows; ++row) {
auto n = (row % 4) + 1;
ASSERT_EQ(row_map(row + 1) - row_map(row), n);
for (std::int32_t j = 0; j < n; ++j) {
ASSERT_EQ(entries(row_map(row) + j), (j + 1) * (j + 1));
}
}
}
}
} // anonymous namespace
TEST(TEST_CATEGORY, crs_count_fill) {
test_count_fill<TEST_EXECSPACE>(0);
test_count_fill<TEST_EXECSPACE>(1);
test_count_fill<TEST_EXECSPACE>(2);
test_count_fill<TEST_EXECSPACE>(3);
test_count_fill<TEST_EXECSPACE>(13);
test_count_fill<TEST_EXECSPACE>(100);
test_count_fill<TEST_EXECSPACE>(1000);
test_count_fill<TEST_EXECSPACE>(10000);
}
TEST(TEST_CATEGORY, crs_copy_constructor) {
test_constructor<TEST_EXECSPACE>(0);
test_constructor<TEST_EXECSPACE>(1);
test_constructor<TEST_EXECSPACE>(2);
test_constructor<TEST_EXECSPACE>(3);
test_constructor<TEST_EXECSPACE>(13);
test_constructor<TEST_EXECSPACE>(100);
test_constructor<TEST_EXECSPACE>(1000);
test_constructor<TEST_EXECSPACE>(10000);
}
} // namespace Test
|