1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
// RUN: %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
// RUN: -S -o - %s | FileCheck -check-prefix=GFX90A %s
// REQUIRES: amdgpu-registered-target
typedef half __attribute__((ext_vector_type(2))) half2;
// CHECK-LABEL: test_global_add_f64
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_global_add_f64$local:
// GFX90A: global_atomic_add_f64
void test_global_add_f64(__global double *addr, double x) {
double *rtn;
*rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x);
}
// CHECK-LABEL: test_global_add_half2
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_global_add_half2
// GFX90A: global_atomic_pk_add_f16 v2, v[0:1], v2, off glc
void test_global_add_half2(__global half2 *addr, half2 x) {
half2 *rtn;
*rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
}
// CHECK-LABEL: test_global_global_min_f64
// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_global_global_min_f64$local
// GFX90A: global_atomic_min_f64
void test_global_global_min_f64(__global double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x);
}
// CHECK-LABEL: test_global_max_f64
// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_global_max_f64$local
// GFX90A: global_atomic_max_f64
void test_global_max_f64(__global double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x);
}
// CHECK-LABEL: test_flat_add_local_f64
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8{{$}}
// GFX90A-LABEL: test_flat_add_local_f64$local
// GFX90A: ds_add_rtn_f64
void test_flat_add_local_f64(__local double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
}
// CHECK-LABEL: test_flat_global_add_f64
// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_flat_global_add_f64$local
// GFX90A: global_atomic_add_f64
void test_flat_global_add_f64(__global double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
}
// CHECK-LABEL: test_flat_min_flat_f64
// CHECK: = atomicrmw fmin ptr {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_flat_min_flat_f64$local
// GFX90A: flat_atomic_min_f64
void test_flat_min_flat_f64(__generic double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
}
// CHECK-LABEL: test_flat_global_min_f64
// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A: test_flat_global_min_f64$local
// GFX90A: global_atomic_min_f64
void test_flat_global_min_f64(__global double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
}
// CHECK-LABEL: test_flat_max_flat_f64
// CHECK: = atomicrmw fmax ptr {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_flat_max_flat_f64$local
// GFX90A: flat_atomic_max_f64
void test_flat_max_flat_f64(__generic double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
}
// CHECK-LABEL: test_flat_global_max_f64
// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// GFX90A-LABEL: test_flat_global_max_f64$local
// GFX90A: global_atomic_max_f64
void test_flat_global_max_f64(__global double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
}
// CHECK-LABEL: test_ds_add_local_f64
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} monotonic, align 8
// GFX90A: test_ds_add_local_f64$local
// GFX90A: ds_add_rtn_f64
void test_ds_add_local_f64(__local double *addr, double x){
double *rtn;
*rtn = __builtin_amdgcn_ds_atomic_fadd_f64(addr, x);
}
// CHECK-LABEL: test_ds_addf_local_f32
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4
// GFX90A-LABEL: test_ds_addf_local_f32$local
// GFX90A: ds_add_rtn_f32
void test_ds_addf_local_f32(__local float *addr, float x){
float *rtn;
*rtn = __builtin_amdgcn_ds_atomic_fadd_f32(addr, x);
}
// CHECK-LABEL: @test_global_add_f32
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
void test_global_add_f32(float *rtn, global float *addr, float x) {
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
}
|