1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940
define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 4.0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_inv sc0 sc1
; GFX940-NEXT: s_endpgm
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
ret void
}
define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 4.0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_inv sc0 sc1
; GFX940-NEXT: s_endpgm
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
ret void
}
define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v2, 4.0
; GFX940-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_inv sc0 sc1
; GFX940-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
ret float %ret
}
define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX940-LABEL: local_atomic_fadd_ret_v2f16_offset:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
%result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
ret <2 x half> %result
}
define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX940-LABEL: local_atomic_fadd_noret_v2f16_offset:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: ds_pk_add_f16 v0, v1 offset:65532
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
%unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
ret void
}
define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
; GFX940-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
%result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
ret <2 x half> %result
}
define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
; GFX940-LABEL: global_atomic_fadd_noret_v2f16_agent_offset:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:1024
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
%unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
ret void
}
define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX940-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr %ptr, i32 256
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret <2 x half> %result
}
define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX940-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr %ptr, i32 256
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret void
}
attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
!0 = !{}
|