1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <vector>
#include <iostream>
#include <Kokkos_Core.hpp>
namespace Test {
namespace {
/* This test is meant to be the WorkGraph equivalent of the Task DAG Scheduler
test, please see TestTaskScheduler.hpp for that test. The algorithm computes
the N-th fibonacci number as follows:
- Each "task" or "work item" computes the i-th fibonacci number
- If a task as (i < 2), it will record the known answer ahead of time.
- If a task has (i >= 2), it will "spawn" two more tasks to compute
the (i - 1) and (i - 2) fibonacci numbers.
We do NOT do any de-duplication of these tasks.
De-duplication would result in only (N - 2) tasks which must be run in
serial. We allow duplicates both to increase the number of tasks and to
increase the amount of available parallelism.
*/
template <class ExecSpace>
struct TestWorkGraph {
using MemorySpace = typename ExecSpace::memory_space;
using Policy = Kokkos::WorkGraphPolicy<std::int32_t, ExecSpace>;
using Graph = typename Policy::graph_type;
using RowMap = typename Graph::row_map_type;
using Entries = typename Graph::entries_type;
using Values = Kokkos::View<long*, MemorySpace>;
long m_input;
Graph m_graph;
Graph m_transpose;
Values m_values;
TestWorkGraph(long arg_input) : m_input(arg_input) {
form_graph();
transpose_crs(m_transpose, m_graph);
}
inline long full_fibonacci(long n) {
constexpr long mask = 0x03;
long fib[4] = {0, 1, 1, 2};
for (long i = 2; i <= n; ++i) {
fib[i & mask] = fib[(i - 1) & mask] + fib[(i - 2) & mask];
}
return fib[n & mask];
}
struct HostEntry {
long input;
std::int32_t parent;
};
std::vector<HostEntry> form_host_graph() {
std::vector<HostEntry> g;
g.push_back({m_input, -1});
for (std::int32_t i = 0; i < std::int32_t(g.size()); ++i) {
auto e = g.at(std::size_t(i));
if (e.input < 2) continue;
/* This part of the host graph formation is the equivalent of task
spawning in the Task DAG system. Notice how each task which is not a
base case spawns two more tasks, without any de-duplication */
g.push_back({e.input - 1, i});
g.push_back({e.input - 2, i});
}
return g;
}
void form_graph() {
auto hg = form_host_graph();
m_graph.row_map =
RowMap("row_map", hg.size() + 1); // row map always has one more
m_graph.entries =
Entries("entries", hg.size() - 1); // all but the first have a parent
m_values = Values("values", hg.size());
// printf("%zu work items\n", hg.size());
auto h_row_map = Kokkos::create_mirror_view(m_graph.row_map);
auto h_entries = Kokkos::create_mirror_view(m_graph.entries);
auto h_values = Kokkos::create_mirror_view(m_values);
h_row_map(0) = 0;
for (std::int32_t i = 0; i < std::int32_t(hg.size()); ++i) {
auto& e = hg.at(std::size_t(i));
h_row_map(i + 1) = i;
if (e.input < 2) {
h_values(i) = e.input;
}
if (e.parent == -1) continue;
h_entries(i - 1) = e.parent;
}
Kokkos::deep_copy(m_graph.row_map, h_row_map);
Kokkos::deep_copy(m_graph.entries, h_entries);
Kokkos::deep_copy(m_values, h_values);
}
KOKKOS_INLINE_FUNCTION
void operator()(std::int32_t i) const {
auto begin = m_transpose.row_map(i);
auto end = m_transpose.row_map(i + 1);
for (auto j = begin; j < end; ++j) {
auto k = m_transpose.entries(j);
m_values(i) += m_values(k);
}
}
void test_for() {
Kokkos::parallel_for(Policy(m_graph), *this);
Kokkos::fence();
auto h_values = Kokkos::create_mirror_view(m_values);
Kokkos::deep_copy(h_values, m_values);
ASSERT_EQ(h_values(0), full_fibonacci(m_input));
}
};
} // anonymous namespace
TEST(TEST_CATEGORY, workgraph_fib) {
int limit = 27;
for (int i = 0; i < limit; ++i) {
TestWorkGraph<TEST_EXECSPACE> f(i);
f.test_for();
}
// TestWorkGraph< TEST_EXECSPACE > f(2);
// f.test_for();
}
} // namespace Test
|