File: launch_bounds_reduce.cpp

package info (click to toggle)
kokkos 5.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 15,140 kB
  • sloc: cpp: 225,293; sh: 1,250; python: 78; makefile: 16; fortran: 4; ansic: 2
file content (132 lines) | stat: -rw-r--r-- 3,509 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Core.hpp>
#include <cstdio>

//
// First reduction (parallel_reduce) example:
//   1. Start up Kokkos
//   2. Execute a parallel_reduce loop in the default execution space,
//      using a functor to define the loop body
//   3. Shut down Kokkos
//
struct collision {
  // Reduction functor
  // For each i, we generate 10 hashes, look for and count collisions
  // We use parallel_reduce to count the total collisions
  // Note that we're just counting collisions within the 10 generated
  // one i.
  // This function was chosen as one that very simply can increase the
  // register count.
  using value_type = int;

  KOKKOS_INLINE_FUNCTION
  int hash(int q) const {
    // A simple hash by Justin Sobel
    // Thanks to Arash Partow (partow.net)
    char* fourchars =
        (char*)&q;  // NOLINT(cppcoreguidelines-pro-type-cstyle-cast)
    int hash = 1315423911;
    for (int i = 0; i < 4; fourchars++, i++) {
      hash ^= ((hash << 5) + *fourchars + (hash >> 2));
    }
    return hash;
  }

  KOKKOS_INLINE_FUNCTION
  void operator()(const int i, int& lsum) const {
    // This is a silly function which generates 10 hashes
    // then checks for collisions
    int a = hash(i) % 64;
    int b = hash(i * 3) % 64;
    int c = hash(i * 5) % 64;
    int d = hash(i * 7) % 64;
    int e = hash(i * 11) % 64;
    int f = hash(i * 17) % 64;
    int g = hash(i * 23) % 64;
    int h = hash(i * 29) % 64;
    int j = hash(i * 31) % 64;
    int k = hash(i * 37) % 64;

    if (a == b) lsum++;
    if (a == c) lsum++;
    if (a == d) lsum++;
    if (a == e) lsum++;
    if (a == f) lsum++;
    if (a == g) lsum++;
    if (a == h) lsum++;
    if (a == j) lsum++;
    if (a == k) lsum++;
    if (b == c) lsum++;
    if (b == d) lsum++;
    if (b == e) lsum++;
    if (b == f) lsum++;
    if (b == g) lsum++;
    if (b == h) lsum++;
    if (b == j) lsum++;
    if (b == k) lsum++;
    if (c == d) lsum++;
    if (c == e) lsum++;
    if (c == f) lsum++;
    if (c == g) lsum++;
    if (c == h) lsum++;
    if (c == j) lsum++;
    if (c == k) lsum++;
    if (d == e) lsum++;
    if (d == f) lsum++;
    if (d == g) lsum++;
    if (d == h) lsum++;
    if (d == j) lsum++;
    if (d == k) lsum++;
    if (e == f) lsum++;
    if (e == g) lsum++;
    if (e == h) lsum++;
    if (e == j) lsum++;
    if (e == k) lsum++;
    if (f == g) lsum++;
    if (f == h) lsum++;
    if (f == j) lsum++;
    if (f == k) lsum++;
    if (g == h) lsum++;
    if (g == j) lsum++;
    if (g == k) lsum++;
    if (h == j) lsum++;
    if (h == k) lsum++;
    if (j == k) lsum++;
  }
};

int main(int argc, char* argv[]) {
  Kokkos::initialize(argc, argv);
  const int n = 10000;

  // Compute and count hash collisions in
  // parallel, using Kokkos.
  // This is not really a useful algorithm, but it demonstrates the
  // LaunchBounds functionality
  int sum1 = 0;
  int sum2 = 0;

  // Without LaunchBounds, the kernel uses 56 registers
  Kokkos::parallel_reduce(n, collision(), sum1);

  // With LaunchBounds, we can reduce the register usage to 32
  Kokkos::parallel_reduce(
      Kokkos::RangePolicy<Kokkos::LaunchBounds<512, 4>>(0, n), collision(),
      sum2);

  printf(
      "Number of collisions, "
      "computed in parallel, is %i\n",
      sum1);

  if (sum1 != sum2) {
    printf("Uh-oh! Results do not match\n");
    return -1;
  }

  Kokkos::finalize();

  return 0;
}