File: auto_zero_copy_globals.cpp

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.8-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,236,724 kB
  • sloc: cpp: 7,619,776; ansic: 1,433,958; asm: 1,058,748; python: 252,197; f90: 94,671; objc: 70,753; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,719; awk: 3,523; javascript: 2,272; xml: 892; fortran: 770
file content (79 lines) | stat: -rw-r--r-- 2,713 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// clang-format off
// RUN: %libomptarget-compilexx-generic
// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK

// REQUIRES: amdgpu
// REQUIRES: unified_shared_memory

// clang-format on

#include <cstdint>
#include <cstdio>

/// Test for globals under automatic zero-copy.
/// Because we are building without unified_shared_memory
/// requirement pragma, all globals are allocated in the device
/// memory of all used GPUs. To ensure those globals contain the intended
/// values, we need to execute H2D and D2H memory copies even if we are running
/// in automatic zero-copy. This only applies to globals. Local variables (their
/// host pointers) are passed to the kernels by-value, according to the
/// automatic zero-copy behavior.

#pragma omp begin declare target
int32_t x;     // 4 bytes
int32_t z[10]; // 40 bytes
int32_t *k;    // 20 bytes
#pragma omp end declare target

int main() {
  int32_t *dev_k = nullptr;
  x = 3;
  int32_t y = -1;
  for (size_t t = 0; t < 10; t++)
    z[t] = t;
  k = new int32_t[5];

  printf("Host pointer for k = %p\n", k);
  for (size_t t = 0; t < 5; t++)
    k[t] = -t;

/// target update to forces a copy between host and device global, which we must
/// execute to keep the two global copies consistent. CHECK: Copying data from
/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
#pragma omp target update to(z[ : 10])

/// target map with always modifier (for x) forces a copy between host and
/// device global, which we must execute to keep the two global copies
/// consistent. k's content (host address) is passed by-value to the kernel
/// (Size=20 case). y, being a local variable, is also passed by-value to the
/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
/// memory CHECK: Copying data from host to device, HstPtr={{.*}},
/// TgtPtr={{.*}}, Size=4, Name=x
#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y)   \
    map(from : dev_k)
  {
    x++;
    y++;
    for (size_t t = 0; t < 10; t++)
      z[t]++;
    dev_k = k;
  }
/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
/// Size=20, Name=k

/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
/// Size=4, Name=x

/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
/// Size=40, Name=z
#pragma omp target update from(z[ : 10])

  /// CHECK-NOT: k pointer not correctly passed to kernel
  if (dev_k != k)
    printf("k pointer not correctly passed to kernel\n");

  delete[] k;
  return 0;
}