File: Test16_ParallelScan.hpp

package info (click to toggle)
kokkos 5.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 15,140 kB
  • sloc: cpp: 225,293; sh: 1,250; python: 78; makefile: 16; fortran: 4; ansic: 2
file content (147 lines) | stat: -rw-r--r-- 4,463 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
#else
#include <Kokkos_Core.hpp>
#endif
#include <gtest/gtest.h>

/// @Kokkos_Feature_Level_Required:16
// Incremental test for parallel_scan.
// perform scan on a 1D view of double's and check for correctness.

namespace Test {

using value_type = double;
const int N      = 10;

template <typename ExecSpace>
struct TrivialScanFunctor {
  Kokkos::View<value_type *, ExecSpace> d_data;

  KOKKOS_FUNCTION
  void operator()(const int i, value_type &update_value,
                  const bool final) const {
    const value_type val_i = d_data(i);
    if (final) d_data(i) = update_value;
    update_value += val_i;
  }
};

template <typename ExecSpace>
struct NonTrivialScanFunctor {
  Kokkos::View<value_type *, ExecSpace> d_data;

  KOKKOS_FUNCTION
  void operator()(const int i, value_type &update_value,
                  const bool final) const {
    const value_type val_i = d_data(i);
    if (final) d_data(i) = update_value;
    update_value += val_i;
  }

  NonTrivialScanFunctor(const Kokkos::View<value_type *, ExecSpace> &data)
      : d_data(data) {}
  NonTrivialScanFunctor(NonTrivialScanFunctor const &)            = default;
  NonTrivialScanFunctor(NonTrivialScanFunctor &&)                 = default;
  NonTrivialScanFunctor &operator=(NonTrivialScanFunctor &&)      = default;
  NonTrivialScanFunctor &operator=(NonTrivialScanFunctor const &) = default;
  // Also make sure that it's OK if the destructor is not device-callable.
  ~NonTrivialScanFunctor() {}
};

template <typename ExecSpace>
struct GenericExclusiveScanFunctor {
  Kokkos::View<value_type *, ExecSpace> d_data;

  template <typename IndexType, typename ValueType>
  KOKKOS_FUNCTION void operator()(const IndexType i, ValueType &update_value,
                                  const bool final) const {
    const ValueType val_i = d_data(i);
    if (final) d_data(i) = update_value;
    update_value += val_i;
  }
};

template <class ExecSpace>
struct TestScan {
  // 1D  View of double
  using View_1D = typename Kokkos::View<value_type *, ExecSpace>;

  template <typename FunctorType>
  void parallel_scan() {
    View_1D d_data("data", N);

    // Initialize data.
    Kokkos::parallel_for(
        Kokkos::RangePolicy<ExecSpace>(0, N),
        KOKKOS_LAMBDA(const int i) { d_data(i) = i * 0.5; });

    // Exclusive parallel_scan call
    Kokkos::parallel_scan(Kokkos::RangePolicy<ExecSpace>(0, N),
                          FunctorType{d_data});

    // Copy back the data.
    auto h_data =
        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d_data);

    // Check Correctness
    ASSERT_EQ(h_data(0), 0.0);
    value_type upd = h_data(0);
    for (int i = 1; i < N; ++i) {
      upd += (i - 1) * 0.5;
      ASSERT_EQ(h_data(i), upd);
    }
  }
};

template <class ExecSpace>
struct TestScanWithTotal {
  // 1D  View of double
  using View_1D  = typename Kokkos::View<value_type *, ExecSpace>;
  View_1D d_data = View_1D("data", N);

  template <typename IndexType>
  KOKKOS_FUNCTION void operator()(IndexType i) const {
    d_data(i) = i * 0.5;
  }

  template <typename FunctorType>
  void parallel_scan() {
    // Initialize data.
    Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0, N), *this);

    value_type total;
    // Exclusive parallel_scan call
    Kokkos::parallel_scan(Kokkos::RangePolicy<ExecSpace>(0, N),
                          FunctorType{d_data}, total);

    // Copy back the data.
    auto h_data =
        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d_data);

    // Check Correctness
    ASSERT_EQ(h_data(0), 0.0);
    value_type upd = h_data(0);
    for (int i = 1; i < N; ++i) {
      upd += (i - 1) * 0.5;
      ASSERT_EQ(h_data(i), upd);
    }
    ASSERT_EQ(total, N * (N - 1) * 0.25);
  }
};

TEST(TEST_CATEGORY, IncrTest_16_parallelscan) {
  TestScan<TEST_EXECSPACE> test;
  test.parallel_scan<TrivialScanFunctor<TEST_EXECSPACE>>();
  test.parallel_scan<NonTrivialScanFunctor<TEST_EXECSPACE>>();
  TestScanWithTotal<TEST_EXECSPACE> test_total;
  test_total.parallel_scan<TrivialScanFunctor<TEST_EXECSPACE>>();
  test_total.parallel_scan<NonTrivialScanFunctor<TEST_EXECSPACE>>();
  test_total.parallel_scan<GenericExclusiveScanFunctor<TEST_EXECSPACE>>();
}

}  // namespace Test