1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
#include "Inputs/cuda.h"
// Coerced struct from `struct S` without all generic pointers lowered into
// global ones.
// On the host-side compilation, generic pointer won't be coerced.
// HOST-NOT: %struct.S.coerce
// HOST-NOT: %struct.T.coerce
// HOST: define{{.*}} void @_Z22__device_stub__kernel1Pi(ptr noundef %x)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(ptr addrspace(1){{.*}} %x.coerce)
// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}}
// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
// OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4
// OPT: ret void
__global__ void kernel1(int *x) {
x[0]++;
}
// HOST: define{{.*}} void @_Z22__device_stub__kernel2Ri(ptr noundef nonnull align 4 dereferenceable(4) %x)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(ptr addrspace(1){{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}}
// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
// OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4
// OPT: ret void
__global__ void kernel2(int &x) {
x++;
}
// HOST: define{{.*}} void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(ptr addrspace(2) noundef %x, ptr addrspace(1) noundef %y)
// CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(ptr addrspace(2){{.*}} %x, ptr addrspace(1){{.*}} %y)
// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
__global__ void kernel3(__attribute__((address_space(2))) int *x,
__attribute__((address_space(1))) int *y) {
y[0] = x[0];
}
// COMMON-LABEL: define{{.*}} void @_Z4funcPi(ptr{{.*}} %x)
// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
__device__ void func(int *x) {
x[0]++;
}
struct S {
int *x;
float *y;
};
// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
// by-val). However, the enhanced address inferring pass should be able to
// assume they are global pointers.
//
// HOST: define{{.*}} void @_Z22__device_stub__kernel41S(ptr %s.coerce0, ptr %s.coerce1)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(ptr addrspace(4){{.*}} byref(%struct.S) align 8 %0)
// OPT: [[P0:%.*]] = load ptr, ptr addrspace(4) %0, align 8
// OPT: [[G0:%.*]] ={{.*}} addrspacecast ptr [[P0]] to ptr addrspace(1)
// OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8
// OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8
// OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1)
// OPT: [[V0:%.*]] = load i32, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD:[0-9]+]]
// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
// OPT: store i32 [[INC]], ptr addrspace(1) [[G0]], align 4
// OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4
// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
// OPT: store float [[ADD]], ptr addrspace(1) [[G1]], align 4
// OPT: ret void
__global__ void kernel4(struct S s) {
s.x[0]++;
s.y[0] += 1.f;
}
// If a pointer to struct is passed, only the pointer itself is coerced into the global one.
// HOST: define{{.*}} void @_Z22__device_stub__kernel5P1S(ptr noundef %s)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(ptr addrspace(1){{.*}} %s.coerce)
__global__ void kernel5(struct S *s) {
s->x[0]++;
s->y[0] += 1.f;
}
struct T {
float *x[2];
};
// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
// by-val). However, the enhanced address inferring pass should be able to
// assume they are global pointers.
//
// HOST: define{{.*}} void @_Z22__device_stub__kernel61T(ptr %t.coerce0, ptr %t.coerce1)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(ptr addrspace(4){{.*}} byref(%struct.T) align 8 %0)
// OPT: [[P0:%.*]] = load ptr, ptr addrspace(4) %0, align 8
// OPT: [[G0:%.*]] ={{.*}} addrspacecast ptr [[P0]] to ptr addrspace(1)
// OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8
// OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8
// OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1)
// OPT: [[V0:%.*]] = load float, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD]]
// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
// OPT: store float [[ADD0]], ptr addrspace(1) [[G0]], align 4
// OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4
// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
// OPT: store float [[ADD1]], ptr addrspace(1) [[G1]], align 4
// OPT: ret void
__global__ void kernel6(struct T t) {
t.x[0][0] += 1.f;
t.x[1][0] += 2.f;
}
// Check that coerced pointers retain the noalias attribute when qualified with __restrict.
// HOST: define{{.*}} void @_Z22__device_stub__kernel7Pi(ptr noalias noundef %x)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(ptr addrspace(1) noalias{{.*}} %x.coerce)
__global__ void kernel7(int *__restrict x) {
x[0]++;
}
// Single element struct.
struct SS {
float *x;
};
// HOST: define{{.*}} void @_Z22__device_stub__kernel82SS(ptr %a.coerce)
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(ptr addrspace(1){{.*}} %a.coerce)
// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
// OPT: [[VAL:%.*]] = load float, ptr addrspace(1) %a.coerce, align 4{{$}}
// OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00
// OPT: store float [[INC]], ptr addrspace(1) %a.coerce, align 4
// OPT: ret void
__global__ void kernel8(struct SS a) {
*a.x += 3.f;
}
|