1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
target triple = "amdgcn--"
define void @loads(ptr addrspace(8) %buf) {
; CHECK-LABEL: define void @loads
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0:![0-9]+]]
; CHECK-NEXT: [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]]
; CHECK-NEXT: [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]]
; CHECK-NEXT: [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
; CHECK-NEXT: [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: [[ATOMIC_MONOTONIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
; CHECK-NEXT: [[ATOMIC_ACQUIRE:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret void
;
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%p = getelementptr float, ptr addrspace(7) %base, i32 4
%scalar = load float, ptr addrspace(7) %p, align 4
%vec2 = load <2 x float>, ptr addrspace(7) %p, align 8
%vec4 = load <4 x float>, ptr addrspace(7) %p, align 16
%nontemporal = load float, ptr addrspace(7) %p, !nontemporal !0
%invariant = load float, ptr addrspace(7) %p, !invariant.load !1
%nontemporal.invariant = load float, ptr addrspace(7) %p, !nontemporal !0, !invariant.load !1
%volatile = load volatile float, ptr addrspace(7) %p
%volatile.nontemporal = load volatile float, ptr addrspace(7) %p, !nontemporal !0
%atomic = load atomic volatile float, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
%atomic.monotonic = load atomic float, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
%atomic.acquire = load atomic float, ptr addrspace(7) %p acquire, align 4
ret void
}
define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) {
; CHECK-LABEL: define void @stores
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0]]
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
; CHECK-NEXT: fence release
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
; CHECK-NEXT: ret void
;
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%p = getelementptr float, ptr addrspace(7) %base, i32 4
store float %f, ptr addrspace(7) %p, align 4
store <4 x float> %f4, ptr addrspace(7) %p, align 16
store float %f, ptr addrspace(7) %p, !nontemporal !0
store volatile float %f, ptr addrspace(7) %p
store volatile float %f, ptr addrspace(7) %p, !nontemporal !0
store atomic volatile float %f, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
store atomic float %f, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
store atomic float %f, ptr addrspace(7) %p release, align 4
ret void
}
define void @atomicrmw(ptr addrspace(8) %buf, float %f, i32 %i) {
; CHECK-LABEL: define void @atomicrmw
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[XCHG:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[ADD:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[SUB:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[AND:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[XOR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[FADD:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[FMAX:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[FMIN:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: ret void
;
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%p = getelementptr float, ptr addrspace(7) %base, i32 4
; Fence insertion is tested by loads and stores
%xchg = atomicrmw xchg ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%add = atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%sub = atomicrmw sub ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%and = atomicrmw and ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%or = atomicrmw or ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%xor = atomicrmw xor ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%min = atomicrmw min ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%max = atomicrmw max ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%umin = atomicrmw umin ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%umax = atomicrmw umax ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
%fadd = atomicrmw fadd ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
%fmax = atomicrmw fmax ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
%fmin = atomicrmw fmin ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
; Check a no-return atomic
atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
ret void
}
define {i32, i1} @cmpxchg(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
; CHECK-LABEL: define { i32, i1 } @cmpxchg
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RET]], [[WANTED]]
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 [[TMP2]], 1
; CHECK-NEXT: ret { i32, i1 } [[TMP3]]
;
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%p = getelementptr i32, ptr addrspace(7) %base, i32 4
%ret = cmpxchg volatile ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
ret {i32, i1} %ret
}
define {i32, i1} @cmpxchg_weak(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
; CHECK-LABEL: define { i32, i1 } @cmpxchg_weak
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: fence syncscope("wavefront") release
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
; CHECK-NEXT: fence syncscope("wavefront") acquire
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
;
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%p = getelementptr i32, ptr addrspace(7) %base, i32 4
%ret = cmpxchg weak ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
ret {i32, i1} %ret
}
!0 = ! { i32 1 }
!1 = ! { }
|