1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck %s
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
%scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
store i32 1, i32* %scratchptr01
%scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
store i32 2, i32* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
%if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
%if_value = load atomic i32, i32* %if_ptr syncscope("workgroup") seq_cst, align 4
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
%else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
%else_value = load atomic i32, i32* %else_ptr syncscope("agent") unordered, align 4
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
%value = phi i32 [ %if_value, %if ], [ %else_value, %else ]
store i32 %value, i32 addrspace(1)* %out
ret void
}
; Function Attrs: convergent nounwind
declare { i1, i64 } @llvm.amdgcn.if(i1) #1
; Function Attrs: convergent nounwind
declare { i1, i64 } @llvm.amdgcn.else(i64) #1
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.break(i64) #2
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.if.break(i1, i64) #2
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.else.break(i64, i64) #2
; Function Attrs: convergent nounwind
declare i1 @llvm.amdgcn.loop(i64) #1
; Function Attrs: convergent nounwind
declare void @llvm.amdgcn.end.cf(i64) #1
attributes #0 = { "target-cpu"="gfx803" }
attributes #1 = { convergent nounwind }
attributes #2 = { convergent nounwind readnone }
!0 = !{}
...
---
# CHECK-LABEL: name: multiple_mem_operands
# CHECK-LABEL: bb.3.done:
# CHECK: S_WAITCNT 3952
# CHECK-NEXT: BUFFER_LOAD_DWORD_OFFEN
# CHECK-NEXT: S_WAITCNT 3952
# CHECK-NEXT: BUFFER_WBINVL1_VOL
name: multiple_mem_operands
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
liveins:
- { reg: '%sgpr0_sgpr1', virtual-reg: '' }
- { reg: '%sgpr3', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 65540
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: 0,
isImmutable: false, isAliased: false, callee-saved-register: '' }
stack:
- { id: 0, name: scratch0, type: default, offset: 4, size: 32768, alignment: 4,
stack-id: 0, callee-saved-register: '', local-offset: 0, di-variable: '',
di-expression: '', di-location: '' }
- { id: 1, name: scratch1, type: default, offset: 32772, size: 32768,
alignment: 4, stack-id: 0, callee-saved-register: '', local-offset: 32768,
di-variable: '', di-expression: '', di-location: '' }
constants:
body: |
bb.0.entry:
successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
liveins: %sgpr0_sgpr1, %sgpr3
%sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
%sgpr8 = S_MOV_B32 $SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr9 = S_MOV_B32 $SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11
%sgpr10 = S_MOV_B32 4294967295, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11
%sgpr11 = S_MOV_B32 15204352, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11
%vgpr0 = V_MOV_B32_e32 1, implicit %exec
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr8_sgpr9_sgpr10_sgpr11, %sgpr3, 4, 0, 0, 0, implicit %exec :: (store 4 into %ir.scratchptr01)
S_WAITCNT 127
S_CMP_LG_U32 killed %sgpr2, 0, implicit-def %scc
S_WAITCNT 3855
%vgpr0 = V_MOV_B32_e32 2, implicit %exec
%vgpr1 = V_MOV_B32_e32 32772, implicit %exec
BUFFER_STORE_DWORD_OFFEN killed %vgpr0, killed %vgpr1, %sgpr8_sgpr9_sgpr10_sgpr11, %sgpr3, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.scratchptr12)
S_CBRANCH_SCC0 %bb.1.if, implicit killed %scc
bb.2.else:
successors: %bb.3.done(0x80000000)
liveins: %sgpr0_sgpr1, %sgpr4_sgpr5, %sgpr3, %sgpr8_sgpr9_sgpr10_sgpr11
%sgpr0 = S_LOAD_DWORD_IMM killed %sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
S_WAITCNT 3855
%vgpr0 = V_MOV_B32_e32 32772, implicit %exec
S_BRANCH %bb.3.done
bb.1.if:
successors: %bb.3.done(0x80000000)
liveins: %sgpr0_sgpr1, %sgpr4_sgpr5, %sgpr3, %sgpr8_sgpr9_sgpr10_sgpr11
%sgpr0 = S_LOAD_DWORD_IMM killed %sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
S_WAITCNT 3855
%vgpr0 = V_MOV_B32_e32 4, implicit %exec
bb.3.done:
liveins: %sgpr3, %sgpr4_sgpr5, %sgpr8_sgpr9_sgpr10_sgpr11, %vgpr0, %sgpr0
S_WAITCNT 127
%sgpr0 = S_LSHL_B32 killed %sgpr0, 2, implicit-def dead %scc
%vgpr0 = V_ADD_I32_e32 killed %sgpr0, killed %vgpr0, implicit-def dead %vcc, implicit %exec
%vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed %vgpr0, killed %sgpr8_sgpr9_sgpr10_sgpr11, %sgpr3, 0, 0, 0, 0, implicit %exec :: (load syncscope("agent") unordered 4 from %ir.else_ptr), (load syncscope("workgroup") seq_cst 4 from %ir.if_ptr)
%vgpr1 = V_MOV_B32_e32 %sgpr4, implicit %exec, implicit-def %vgpr1_vgpr2, implicit %sgpr4_sgpr5
%vgpr2 = V_MOV_B32_e32 killed %sgpr5, implicit %exec, implicit %sgpr4_sgpr5, implicit %exec
S_WAITCNT 3952
FLAT_STORE_DWORD killed %vgpr1_vgpr2, killed %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.out)
S_ENDPGM
...
|