1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -start-before=rename-independent-subregs -mattr=+wavefrontsize64 -stop-before=amdgpu-mark-last-scratch-load %s -o - | FileCheck -check-prefix=REG_ALLOC %s
# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -start-before=rename-independent-subregs -mattr=+wavefrontsize64 -stop-after=machine-cp %s -o - | FileCheck -check-prefix=DEAD_INST_DEL %s
---
name: _amdgpu_cs_main
tracksRegLiveness: true
body: |
; REG_ALLOC-LABEL: name: _amdgpu_cs_main
; REG_ALLOC: bb.0:
; REG_ALLOC-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; REG_ALLOC-NEXT: renamable $vgpr15_vgpr16_vgpr17_vgpr18 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; REG_ALLOC-NEXT: KILL killed renamable $vgpr2
; REG_ALLOC-NEXT: KILL killed renamable $vgpr0
; REG_ALLOC-NEXT: KILL killed renamable $vgpr3
; REG_ALLOC-NEXT: renamable $sgpr12 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec
; REG_ALLOC-NEXT: renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr4, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; REG_ALLOC-NEXT: renamable $sgpr13 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
; REG_ALLOC-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec
; REG_ALLOC-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc
; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
; REG_ALLOC-NEXT: renamable $vgpr8 = IMPLICIT_DEF
; REG_ALLOC-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7
; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; REG_ALLOC-NEXT: S_BRANCH %bb.2
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.1:
; REG_ALLOC-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; REG_ALLOC-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc
; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
; REG_ALLOC-NEXT: S_BRANCH %bb.3
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.2:
; REG_ALLOC-NEXT: successors: %bb.1(0x80000000)
; REG_ALLOC-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc
; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF
; REG_ALLOC-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; REG_ALLOC-NEXT: S_BRANCH %bb.1
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.3:
; REG_ALLOC-NEXT: successors: %bb.5(0x80000000)
; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
; REG_ALLOC-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
; REG_ALLOC-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc
; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; REG_ALLOC-NEXT: S_BRANCH %bb.5
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.4:
; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
; REG_ALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
; REG_ALLOC-NEXT: S_ENDPGM 0
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.5:
; REG_ALLOC-NEXT: successors: %bb.4(0x80000000)
; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; REG_ALLOC-NEXT: S_BRANCH %bb.4
;
; DEAD_INST_DEL-LABEL: name: _amdgpu_cs_main
; DEAD_INST_DEL: bb.0:
; DEAD_INST_DEL-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; DEAD_INST_DEL-NEXT: renamable $vgpr15_vgpr16_vgpr17_vgpr18 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr2
; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr0
; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr3
; DEAD_INST_DEL-NEXT: renamable $sgpr12 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec
; DEAD_INST_DEL-NEXT: renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr4, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; DEAD_INST_DEL-NEXT: renamable $sgpr13 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
; DEAD_INST_DEL-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec
; DEAD_INST_DEL-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
; DEAD_INST_DEL-NEXT: renamable $vgpr8 = IMPLICIT_DEF
; DEAD_INST_DEL-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7
; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.2
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.1:
; DEAD_INST_DEL-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; DEAD_INST_DEL-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc
; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.3
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.2:
; DEAD_INST_DEL-NEXT: successors: %bb.1(0x80000000)
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc
; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF
; DEAD_INST_DEL-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.1
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.3:
; DEAD_INST_DEL-NEXT: successors: %bb.5(0x80000000)
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
; DEAD_INST_DEL-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
; DEAD_INST_DEL-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.5
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.4:
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
; DEAD_INST_DEL-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
; DEAD_INST_DEL-NEXT: S_ENDPGM 0
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.5:
; DEAD_INST_DEL-NEXT: successors: %bb.4(0x80000000)
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.4
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10
%0:sreg_32 = COPY $sgpr0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = COPY $vgpr1
%3:vgpr_32 = COPY $vgpr2
%4:vgpr_32 = COPY $vgpr3
%5:vgpr_32 = COPY $vgpr4
%6:vgpr_32 = COPY $vgpr10
%7:sreg_64 = COPY $sgpr2_sgpr3
%8:sreg_64 = COPY $sgpr4_sgpr5
%9:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11
undef %10.sub0_sub1_sub2_sub3:vreg_256 = BUFFER_LOAD_DWORDX4_OFFEN %1, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%10.sub4_sub5_sub6_sub7:vreg_256 = BUFFER_LOAD_DWORDX4_OFFEN %5, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%11:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %4, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %3, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
undef %13.sub0:sgpr_256 = V_READFIRSTLANE_B32 %11.sub0, implicit $exec
%13.sub1:sgpr_256 = V_READFIRSTLANE_B32 %12.sub0, implicit $exec
S_CMP_EQ_U64 %13.sub0_sub1, %7, implicit-def $scc
%14:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
%15:sreg_64_xexec = V_CMP_NE_U32_e64 %2, 0, implicit $exec
%16:vgpr_32 = IMPLICIT_DEF
$exec = S_MOV_B64_term %15
S_CBRANCH_EXECZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
%17:sreg_64 = S_OR_SAVEEXEC_B64 %15, implicit-def $exec, implicit-def $scc, implicit $exec
%18:sreg_64_xexec = S_AND_B64 $exec, %17, implicit-def $scc
$exec = S_XOR_B64_term $exec, %18, implicit-def $scc
S_CBRANCH_EXECZ %bb.5, implicit $exec
S_BRANCH %bb.3
bb.2:
successors: %bb.1(0x80000000)
%19:sreg_32 = S_OR_B32 %14, 2, implicit-def dead $scc
%16:vgpr_32 = COPY %19
undef %10.sub0_sub1:vreg_256 = IMPLICIT_DEF
S_BRANCH %bb.1
bb.3:
successors: %bb.5(0x80000000)
%20:sreg_32 = V_READFIRSTLANE_B32 %10.sub0, implicit $exec
%21:sreg_32 = V_READFIRSTLANE_B32 %10.sub4, implicit $exec
S_CMP_EQ_U32 %21, %20, implicit-def $scc
%22:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
%16:vgpr_32 = COPY %22
S_BRANCH %bb.5
bb.4:
%23:vgpr_32 = V_ADD_U32_e64 %0, %6, 0, implicit $exec
%24:sgpr_128 = S_LOAD_DWORDX4_IMM %8, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
BUFFER_STORE_DWORD_OFFEN_exact %16, %23, %24, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
S_ENDPGM 0
bb.5:
successors: %bb.4(0x80000000)
$exec = S_OR_B64 $exec, %18, implicit-def $scc
S_BRANCH %bb.4
...
|