File: main.cpp

package info (click to toggle)
kokkos 5.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 15,140 kB
  • sloc: cpp: 225,293; sh: 1,250; python: 78; makefile: 16; fortran: 4; ansic: 2
file content (128 lines) | stat: -rw-r--r-- 4,588 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
#else
#include <Kokkos_Core.hpp>
#endif
#include <Kokkos_Timer.hpp>
#include <Kokkos_Random.hpp>

template <class Scalar>
double test_atomic(int L, int N, int M, int K, int R,
                   Kokkos::View<const int**> offsets) {
  Kokkos::View<Scalar*> output("Output", N);
  Kokkos::Timer timer;

  for (int r = 0; r < R; r++)
    Kokkos::parallel_for(
        L, KOKKOS_LAMBDA(const int& i) {
          Scalar s = 2;
          for (int m = 0; m < M; m++) {
            for (int k = 0; k < K; k++) s = s * s + s;
            const int idx = (i + offsets(i, m)) % N;
            Kokkos::atomic_add(&output(idx), s);
          }
        });
  Kokkos::fence();
  double time = timer.seconds();

  return time;
}

template <class Scalar>
double test_no_atomic(int L, int N, int M, int K, int R,
                      Kokkos::View<const int**> offsets) {
  Kokkos::View<Scalar*> output("Output", N);
  Kokkos::Timer timer;
  for (int r = 0; r < R; r++)
    Kokkos::parallel_for(
        L, KOKKOS_LAMBDA(const int& i) {
          Scalar s = 2;
          for (int m = 0; m < M; m++) {
            for (int k = 0; k < K; k++) s = s * s + s;
            const int idx = (i + offsets(i, m)) % N;
            output(idx) += s;
          }
        });
  Kokkos::fence();
  double time = timer.seconds();
  return time;
}

int main(int argc, char* argv[]) {  // NOLINT(bugprone-exception-escape)
  Kokkos::initialize(argc, argv);
  {
    if (argc < 8) {
      printf("Arguments: L N M D K R T\n");
      printf("  L:   Number of iterations to run\n");
      printf("  N:   Length of array to do atomics into\n");
      printf("  M:   Number of atomics per iteration to do\n");
      printf("  D:   Distance from index i to do atomics into (randomly)\n");
      printf("  K:   Number of FMAD per atomic\n");
      printf("  R:   Number of repeats of the experiments\n");
      printf("  T:   Type of atomic\n");
      printf("       1 - int\n");
      printf("       2 - long\n");
      printf("       3 - float\n");
      printf("       4 - double\n");
      printf("       5 - complex<double>\n");
      printf("Example Input GPU:\n");
      printf("  Histogram : 1000000 1000 1 1000 1 10 1\n");
      printf("  MD Force : 100000 100000 100 1000 20 10 4\n");
      printf("  Matrix Assembly : 100000 1000000 50 1000 20 10 4\n");
      Kokkos::finalize();
      return 0;
    }

    int L    = std::stoi(argv[1]);
    int N    = std::stoi(argv[2]);
    int M    = std::stoi(argv[3]);
    int D    = std::stoi(argv[4]);
    int K    = std::stoi(argv[5]);
    int R    = std::stoi(argv[6]);
    int type = std::stoi(argv[7]);

    Kokkos::View<int**> offsets("Offsets", L, M);
    Kokkos::Random_XorShift64_Pool<> pool(12371);
    Kokkos::fill_random(offsets, pool, D);
    double time = 0;
    if (type == 1) time = test_atomic<int>(L, N, M, K, R, offsets);
    if (type == 2) time = test_atomic<long>(L, N, M, K, R, offsets);
    if (type == 3) time = test_atomic<float>(L, N, M, K, R, offsets);
    if (type == 4) time = test_atomic<double>(L, N, M, K, R, offsets);
    if (type == 5)
      time = test_atomic<Kokkos::complex<double> >(L, N, M, K, R, offsets);

    double time2 = 1;
    if (type == 1) time2 = test_no_atomic<int>(L, N, M, K, R, offsets);
    if (type == 2) time2 = test_no_atomic<long>(L, N, M, K, R, offsets);
    if (type == 3) time2 = test_no_atomic<float>(L, N, M, K, R, offsets);
    if (type == 4) time2 = test_no_atomic<double>(L, N, M, K, R, offsets);
    if (type == 5)
      time2 = test_no_atomic<Kokkos::complex<double> >(L, N, M, K, R, offsets);

    int size = 0;
    if (type == 1) size = sizeof(int);
    if (type == 2) size = sizeof(long);
    if (type == 3) size = sizeof(float);
    if (type == 4) size = sizeof(double);
    if (type == 5) size = sizeof(Kokkos::complex<double>);

    printf("%i\n", size);
    printf(
        "Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf "
        ")( GUpdates/s: %lf GB/s: %lf )\n",
        (type == 1)
            ? "int"
            : ((type == 2)
                   ? "long"
                   : ((type == 3) ? "float"
                                  : ((type == 4) ? "double" : "complex"))),
        L, N, M, D, K, R, time, time2, time / time2, 1.e-9 * L * R * M / time,
        1.0 * L * R * M * 2 * size / time / 1024 / 1024 / 1024);
  }
  Kokkos::finalize();
}