File: main.cpp

package info (click to toggle)
kokkos 5.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 15,148 kB
  • sloc: cpp: 225,388; sh: 1,250; python: 78; makefile: 16; fortran: 4; ansic: 2
file content (110 lines) | stat: -rw-r--r-- 3,467 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
import kokkos.core;
#else
#include <Kokkos_Core.hpp>
#endif
#include <Kokkos_Timer.hpp>
#include "bench.hpp"
#include <cstdlib>

extern template void run_stride_unroll<float>(int, int, int, int, int, int, int,
                                              int, int, int);
extern template void run_stride_unroll<double>(int, int, int, int, int, int,
                                               int, int, int, int);
extern template void run_stride_unroll<int32_t>(int, int, int, int, int, int,
                                                int, int, int, int);
extern template void run_stride_unroll<int64_t>(int, int, int, int, int, int,
                                                int, int, int, int);

int main(int argc, char* argv[]) {
  Kokkos::initialize();

  if (argc < 10) {
    printf("Arguments: N K R D U F T S B I\n");
    printf("  P:   Precision (1==float, 2==double, 3==int32_t, 4==int64_t)\n");
    printf("  N,K: dimensions of the 2D array to allocate\n");
    printf("  R:   how often to loop through the K dimension with each team\n");
    printf("  D:   distance between loaded elements (stride)\n");
    printf("  U:   how many independent flops to do per load\n");
    printf(
        "  F:   how many times to repeat the U unrolled operations before "
        "reading next element\n");
    printf("  T:   team size\n");
    printf(
        "  S:   shared memory per team (used to control occupancy on GPUs)\n");
    printf(
        "  B:   units for reported memory bandwidths (2=GiB, 10=GB, "
        "default=2)\n");
    printf("  I:   iterations of the kernel to time over (default=10)\n");
    printf("Example Input GPU:\n");
    printf("  Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n");
    printf("  Cache Bound     : 2 100000 1024 64 1 1 1 512 20000\n");
    printf("  Compute Bound   : 2 100000 1024 1 1 8 64 256 6000\n");
    printf("  Load Slots Used : 2 20000 256 32 16 1 1 256 6000\n");
    printf("  Inefficient Load: 2 20000 256 32 2 1 1 256 20000\n");
    Kokkos::finalize();
    return 0;
  }

  int P = std::stoi(argv[1]);
  int N = std::stoi(argv[2]);
  int K = std::stoi(argv[3]);
  int R = std::stoi(argv[4]);
  int D = std::stoi(argv[5]);
  int U = std::stoi(argv[6]);
  int F = std::stoi(argv[7]);
  int T = std::stoi(argv[8]);
  int S = std::stoi(argv[9]);

  int B = 2;
  if (argc >= 11) {
    B = std::atoi(argv[10]);
  }

  int I = 10;
  if (argc >= 12) {
    I = std::atoi(argv[11]);
  }

  if (U > 8) {
    printf("U must be 1-8\n");
    return 0;
  }
  if ((D != 1) && (D != 2) && (D != 4) && (D != 8) && (D != 16) && (D != 32)) {
    printf("D must be one of 1,2,4,8,16,32\n");
    return 0;
  }
  if ((P < 1) || (P > 4)) {
    printf("P must be one of 1,2,3,4\n");
    return 0;
  }

  if ((B != 2) && (B != 10)) {
    printf("B must be one of 2,10\n");
    return 0;
  }

  if (I < 1) {
    printf("I must be >= 1\n");
    return 0;
  }

  if (P == 1) {
    run_stride_unroll<float>(N, K, R, D, U, F, T, S, B, I);
  }
  if (P == 2) {
    run_stride_unroll<double>(N, K, R, D, U, F, T, S, B, I);
  }
  if (P == 3) {
    run_stride_unroll<int32_t>(N, K, R, D, U, F, T, S, B, I);
  }
  if (P == 4) {
    run_stride_unroll<int64_t>(N, K, R, D, U, F, T, S, B, I);
  }

  Kokkos::finalize();
}