1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: nvptx64-nvidia-cuda
// XFAIL: nvptx64-nvidia-cuda-oldDriver
// XFAIL: nvptx64-nvidia-cuda-LTO
// Fails on amdgpu with error: GPU Memory Error
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-oldDriver
// XFAIL: amdgcn-amd-amdhsa-LTO
#include <stdio.h>
#include <omp.h>
// ---------------------------------------------------------------------------
// Various definitions copied from OpenMP RTL
extern void __tgt_register_requires(int64_t);
// End of definitions copied from OpenMP RTL.
// ---------------------------------------------------------------------------
#pragma omp requires unified_shared_memory
#define N 1024
void init(int A[], int B[], int C[]) {
for (int i = 0; i < N; ++i) {
A[i] = 0;
B[i] = 1;
C[i] = i;
}
}
int main(int argc, char *argv[]) {
const int device = omp_get_default_device();
// Manual registration of requires flags for Clang versions
// that do not support requires.
__tgt_register_requires(8);
// CHECK: Initial device: [[INITIAL_DEVICE:[0-9]+]]
printf("Initial device: %d\n", omp_get_initial_device());
// CHECK: Num devices: [[INITIAL_DEVICE]]
printf("Num devices: %d\n", omp_get_num_devices());
//
// Target alloc & target memcpy
//
int A[N], B[N], C[N];
// Init
init(A, B, C);
int *pA, *pB, *pC;
// map ptrs
pA = &A[0];
pB = &B[0];
pC = &C[0];
int *d_A = (int *)omp_target_alloc(N * sizeof(int), device);
int *d_B = (int *)omp_target_alloc(N * sizeof(int), device);
int *d_C = (int *)omp_target_alloc(N * sizeof(int), device);
// CHECK: omp_target_alloc succeeded
printf("omp_target_alloc %s\n", d_A && d_B && d_C ? "succeeded" : "failed");
omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device,
omp_get_initial_device());
omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device,
omp_get_initial_device());
#pragma omp target is_device_ptr(d_A, d_B, d_C) device(device)
{
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < N; i++) {
d_A[i] = d_B[i] + d_C[i] + 1;
}
}
omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(),
device);
// CHECK: Test omp_target_memcpy: Succeeded
int fail = 0;
for (int i = 0; i < N; ++i) {
if (A[i] != i + 2)
fail++;
}
if (fail) {
printf("Test omp_target_memcpy: Failed\n");
} else {
printf("Test omp_target_memcpy: Succeeded\n");
}
//
// target_is_present and target_associate/disassociate_ptr
//
init(A, B, C);
// CHECK: B is not present, associating it...
// CHECK: omp_target_associate_ptr B succeeded
if (!omp_target_is_present(B, device)) {
printf("B is not present, associating it...\n");
int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device);
printf("omp_target_associate_ptr B %s\n", !rc ? "succeeded" : "failed");
}
// CHECK: C is not present, associating it...
// CHECK: omp_target_associate_ptr C succeeded
if (!omp_target_is_present(C, device)) {
printf("C is not present, associating it...\n");
int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device);
printf("omp_target_associate_ptr C %s\n", !rc ? "succeeded" : "failed");
}
// CHECK: Inside target data: A is not present
// CHECK: Inside target data: B is present
// CHECK: Inside target data: C is present
#pragma omp target data map(from : B, C) device(device)
{
printf("Inside target data: A is%s present\n",
omp_target_is_present(A, device) ? "" : " not");
printf("Inside target data: B is%s present\n",
omp_target_is_present(B, device) ? "" : " not");
printf("Inside target data: C is%s present\n",
omp_target_is_present(C, device) ? "" : " not");
#pragma omp target map(from : A) device(device)
{
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < N; i++)
A[i] = B[i] + C[i] + 1;
}
}
// CHECK: B is present, disassociating it...
// CHECK: omp_target_disassociate_ptr B succeeded
// CHECK: C is present, disassociating it...
// CHECK: omp_target_disassociate_ptr C succeeded
if (omp_target_is_present(B, device)) {
printf("B is present, disassociating it...\n");
int rc = omp_target_disassociate_ptr(B, device);
printf("omp_target_disassociate_ptr B %s\n", !rc ? "succeeded" : "failed");
}
if (omp_target_is_present(C, device)) {
printf("C is present, disassociating it...\n");
int rc = omp_target_disassociate_ptr(C, device);
printf("omp_target_disassociate_ptr C %s\n", !rc ? "succeeded" : "failed");
}
// CHECK: Test omp_target_associate_ptr: Succeeded
fail = 0;
for (int i = 0; i < N; ++i) {
if (A[i] != i + 2)
fail++;
}
if (fail) {
printf("Test omp_target_associate_ptr: Failed\n");
} else {
printf("Test omp_target_associate_ptr: Succeeded\n");
}
omp_target_free(d_A, device);
omp_target_free(d_B, device);
omp_target_free(d_C, device);
printf("Done!\n");
return 0;
}
|