File: TestCuda_DebugPinUVMSpace.cpp

package info (click to toggle)
kokkos 4.7.01-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 16,636 kB
  • sloc: cpp: 223,676; sh: 2,446; makefile: 2,437; python: 91; fortran: 4; ansic: 2
file content (106 lines) | stat: -rw-r--r-- 3,289 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
//@HEADER
// ************************************************************************
//
//                        Kokkos v. 4.0
//       Copyright (2022) National Technology & Engineering
//               Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <Kokkos_Core.hpp>
#include <TestCuda_Category.hpp>

namespace Test {

template <class View>
struct CopyFunctor {
  View a;
  View b;

  CopyFunctor(int N) : a(View("A", N)), b(View("B", N)) {}

  KOKKOS_INLINE_FUNCTION
  void operator()(int i) const { a(i) = b(i); }

  double time_copy(int R) {
    Kokkos::parallel_for("CopyFunctor::time_copy", a.extent(0), *this);
    Kokkos::fence();

    Kokkos::Timer timer;
    for (int r = 0; r < R; r++)
      Kokkos::parallel_for("CopyFunctor::time_copy", a.extent(0), *this);
    Kokkos::fence();
    return timer.seconds();
  }
};

TEST(cuda, debug_pin_um_to_host) {
#ifdef KOKKOS_ARCH_AMPERE87
  GTEST_SKIP() << "skipping for Jetson devices that have integrated memory";
#endif
  double time_cuda_space;
  double time_cuda_host_pinned_space;
  double time_cuda_uvm_space_not_pinned_1;
  double time_cuda_uvm_space_pinned;
  double time_cuda_uvm_space_not_pinned_2;

  int N = 10000000;
  int R = 100;
  {
    CopyFunctor<Kokkos::View<int*, Kokkos::CudaSpace>> f(N);
    time_cuda_space = f.time_copy(R);
  }
  {
    CopyFunctor<Kokkos::View<int*, Kokkos::CudaHostPinnedSpace>> f(N);
    time_cuda_host_pinned_space = f.time_copy(R);
  }
  {
    CopyFunctor<Kokkos::View<int*, Kokkos::CudaUVMSpace>> f(N);
    time_cuda_uvm_space_not_pinned_1 = f.time_copy(R);
  }
  {
#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
    kokkos_impl_cuda_set_pin_uvm_to_host(true);
#endif
    CopyFunctor<Kokkos::View<int*, Kokkos::CudaUVMSpace>> f(N);
    time_cuda_uvm_space_pinned = f.time_copy(R);
#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
    kokkos_impl_cuda_set_pin_uvm_to_host(false);
#endif
  }
  {
    CopyFunctor<Kokkos::View<int*, Kokkos::CudaUVMSpace>> f(N);
    time_cuda_uvm_space_not_pinned_2 = f.time_copy(R);
  }
  bool uvm_approx_cuda_1 =
      time_cuda_uvm_space_not_pinned_1 < time_cuda_space * 2.0;
  bool uvm_approx_cuda_2 =
      time_cuda_uvm_space_not_pinned_2 < time_cuda_space * 2.0;
  bool pinned_slower_cuda = time_cuda_host_pinned_space > time_cuda_space * 2.0;
  bool uvm_pinned_slower_cuda =
      time_cuda_uvm_space_pinned > time_cuda_space * 2.0;

  bool passed = uvm_approx_cuda_1 && uvm_approx_cuda_2 && pinned_slower_cuda &&
#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
                uvm_pinned_slower_cuda;
#else
                !uvm_pinned_slower_cuda;
#endif
  if (!passed)
    printf(
        "Time CudaSpace: %lf CudaUVMSpace_1: %lf CudaUVMSpace_2: %lf "
        "CudaPinnedHostSpace: %lf CudaUVMSpace_Pinned: %lf\n",
        time_cuda_space, time_cuda_uvm_space_not_pinned_1,
        time_cuda_uvm_space_not_pinned_2, time_cuda_host_pinned_space,
        time_cuda_uvm_space_pinned);
  ASSERT_TRUE(passed);
}

}  // namespace Test