1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple x86_64-linux-gnu \
// RUN: | FileCheck -check-prefixes=CHECK,HOST %s
// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: | FileCheck -check-prefixes=CHECK,DEV %s
#include "Inputs/cuda.h"
// CHECK: %class.anon = type { ptr, float, ptr, ptr }
// CHECK: %class.anon.0 = type { ptr, float, ptr, ptr }
// CHECK: %class.anon.1 = type { ptr, ptr, ptr }
// CHECK: %class.anon.2 = type { ptr, float, ptr, ptr }
// HOST: call void @_ZN8DevByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon)
// DEV: define amdgpu_kernel void @_ZN8DevByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon)
// Only the device function passes arugments by value.
namespace DevByVal {
__device__ float fun(float x, float y) {
return x;
}
float fun(const float &x, const float &y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN9HostByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.0)
// DEV: define amdgpu_kernel void @_ZN9HostByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.0)
// Only the host function passes arugments by value.
namespace HostByVal {
float fun(float x, float y) {
return x;
}
__device__ float fun(const float &x, const float &y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN9BothByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.1)
// DEV: define amdgpu_kernel void @_ZN9BothByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.1)
// Both the host and device functions pass arugments by value.
namespace BothByVal {
float fun(float x, float y) {
return x;
}
__device__ float fun(float x, float y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN12NeitherByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.2)
// DEV: define amdgpu_kernel void @_ZN12NeitherByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.2)
// Neither the host nor device function passes arugments by value.
namespace NeitherByVal {
float fun(const float& x, const float& y) {
return x;
}
__device__ float fun(const float& x, const float& y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
|