File: vgpr-mark-last-scratch-load.mir

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (309 lines) | stat: -rw-r--r-- 22,735 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load -verify-machineinstrs | FileCheck -check-prefix=CHECK %s

--- |
  define amdgpu_cs void @test_spill_12x32() "amdgpu-num-vgpr"="12" {
    ret void
  }
  define amdgpu_cs void @test_spill_384() "amdgpu-num-vgpr"="12" {
    ret void
  }
  define amdgpu_ps void @test_loop_12() "amdgpu-num-vgpr"="12" {
    ret void
  }
...
---
name: test_spill_12x32
tracksRegLiveness: true
machineFunctionInfo:
  stackPtrOffsetReg: '$sgpr32'
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11

    ; CHECK-LABEL: name: test_spill_12x32
    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr2, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr3, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr4, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr5, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr6, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr7, %stack.7, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr8, %stack.8, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr9, %stack.9, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr10, %stack.10, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr11, %stack.11, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.0, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.1, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.2, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE3:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.3, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE4:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.4, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE5:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.5, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE6:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.6, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE7:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.7, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE8:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.8, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE9:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.9, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE10:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.10, addrspace 5)
    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE11:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.11, addrspace 5)
    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SI_SPILL_V32_RESTORE]], implicit [[SI_SPILL_V32_RESTORE1]], implicit [[SI_SPILL_V32_RESTORE2]], implicit [[SI_SPILL_V32_RESTORE3]], implicit [[SI_SPILL_V32_RESTORE4]], implicit [[SI_SPILL_V32_RESTORE5]], implicit [[SI_SPILL_V32_RESTORE6]], implicit [[SI_SPILL_V32_RESTORE7]], implicit [[SI_SPILL_V32_RESTORE8]], implicit [[SI_SPILL_V32_RESTORE9]], implicit [[SI_SPILL_V32_RESTORE10]], implicit [[SI_SPILL_V32_RESTORE11]]
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = COPY $vgpr2
    %3:vgpr_32 = COPY $vgpr3
    %4:vgpr_32 = COPY $vgpr4
    %5:vgpr_32 = COPY $vgpr5
    %6:vgpr_32 = COPY $vgpr6
    %7:vgpr_32 = COPY $vgpr7
    %8:vgpr_32 = COPY $vgpr8
    %9:vgpr_32 = COPY $vgpr9
    %10:vgpr_32 = COPY $vgpr10
    %11:vgpr_32 = COPY $vgpr11
    INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
    S_ENDPGM 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11
...

---
name: test_spill_384
tracksRegLiveness: true
machineFunctionInfo:
  stackPtrOffsetReg: '$sgpr32'
body:             |
  bb.0:
    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11

    ; CHECK-LABEL: name: test_spill_384
    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: SI_SPILL_V384_SAVE $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, %stack.0, $sgpr32, 0, implicit $exec :: (store (s384) into %stack.0, align 4, addrspace 5)
    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
    ; CHECK-NEXT: [[SI_SPILL_V384_RESTORE:%[0-9]+]]:vreg_384 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s384) from %stack.0, align 4, addrspace 5)
    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SI_SPILL_V384_RESTORE]]
    %0:vreg_384 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
    INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
    S_ENDPGM 0, implicit %0
...

---
name: test_loop_12
tracksRegLiveness: true
machineFunctionInfo:
  stackPtrOffsetReg: '$sgpr32'
body:             |
  ; CHECK-LABEL: name: test_loop_12
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr12, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr11, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr10, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr9, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr8, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr7, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr6, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr5, %stack.7, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr4, %stack.8, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr3, %stack.9, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr2, %stack.10, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr1, %stack.11, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr0, %stack.12, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_3]], %stack.16, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
  ; CHECK-NEXT:   %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
  ; CHECK-NEXT:   %vcmp:sreg_32 = V_CMP_LT_I32_e64 0, [[SI_SPILL_V32_RESTORE]], implicit $exec
  ; CHECK-NEXT:   %mask:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
  ; CHECK-NEXT:   %sand:sreg_32 = S_AND_B32 %mask, %vcmp, implicit-def dead $scc
  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term %sand
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %mask2:sgpr_32 = S_MOV_B32 0
  ; CHECK-NEXT:   %count:sgpr_32 = S_MOV_B32 0
  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.3
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, %mask, implicit-def $scc
  ; CHECK-NEXT:   S_BRANCH %bb.5
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.3:
  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.3(0x7c000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_1]], %stack.14, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_]], 0, [[SI_SPILL_V32_RESTORE1]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.14, addrspace 5)
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_]], %stack.13, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_1]], 0, [[SI_SPILL_V32_RESTORE2]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE3:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_2]], 0, [[SI_SPILL_V32_RESTORE3]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_2]], %stack.15, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE4:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
  ; CHECK-NEXT:   [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_4]], 0, [[SI_SPILL_V32_RESTORE4]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE5:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
  ; CHECK-NEXT:   %res5:vgpr_32 = V_ADD_F32_e64 0, %res5, 0, [[SI_SPILL_V32_RESTORE5]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE6:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
  ; CHECK-NEXT:   %res6:vgpr_32 = V_ADD_F32_e64 0, %res6, 0, [[SI_SPILL_V32_RESTORE6]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE7:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
  ; CHECK-NEXT:   %res7:vgpr_32 = V_ADD_F32_e64 0, %res7, 0, [[SI_SPILL_V32_RESTORE7]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE8:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
  ; CHECK-NEXT:   %res8:vgpr_32 = V_ADD_F32_e64 0, %res8, 0, [[SI_SPILL_V32_RESTORE8]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE9:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
  ; CHECK-NEXT:   %res9:vgpr_32 = V_ADD_F32_e64 0, %res9, 0, [[SI_SPILL_V32_RESTORE9]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE10:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
  ; CHECK-NEXT:   %res10:vgpr_32 = V_ADD_F32_e64 0, %res10, 0, [[SI_SPILL_V32_RESTORE10]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE11:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
  ; CHECK-NEXT:   %res11:vgpr_32 = V_ADD_F32_e64 0, %res11, 0, [[SI_SPILL_V32_RESTORE11]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE12:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
  ; CHECK-NEXT:   %res12:vgpr_32 = V_ADD_F32_e64 0, %res12, 0, [[SI_SPILL_V32_RESTORE12]], 0, 0, implicit $mode, implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
  ; CHECK-NEXT:   %count:sgpr_32 = nuw nsw S_ADD_I32 %count, 1, implicit-def dead $scc
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE13:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
  ; CHECK-NEXT:   %vcmp2:sreg_32 = V_CMP_GE_I32_e64 %count, [[SI_SPILL_V32_RESTORE13]], implicit $exec
  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.15, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
  ; CHECK-NEXT:   %mask2:sgpr_32 = S_OR_B32 %vcmp2, %mask2, implicit-def $scc
  ; CHECK-NEXT:   $exec_lo = S_ANDN2_B32_term $exec_lo, %mask2, implicit-def $scc
  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.4
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.4:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, %mask2, implicit-def $scc
  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_4]], %stack.16, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.5:
  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE14:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.16, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.16, addrspace 5)
  ; CHECK-NEXT:   EXP_DONE 0, [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_2]], [[SI_SPILL_V32_RESTORE14]], -1, 0, 15, implicit $exec
  ; CHECK-NEXT:   EXP_DONE 0, %res5, %res6, %res7, %res8, -1, 0, 15, implicit $exec
  ; CHECK-NEXT:   EXP_DONE 0, %res9, %res10, %res11, %res12, -1, 0, 15, implicit $exec
  ; CHECK-NEXT:   S_ENDPGM 0
  bb.0: ; entry
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12

    %12:vgpr_32 = COPY $vgpr12
    %11:vgpr_32 = COPY $vgpr11
    %10:vgpr_32 = COPY $vgpr10
    %9:vgpr_32 = COPY $vgpr9
    %8:vgpr_32 = COPY $vgpr8
    %7:vgpr_32 = COPY $vgpr7
    %6:vgpr_32 = COPY $vgpr6
    %5:vgpr_32 = COPY $vgpr5
    %4:vgpr_32 = COPY $vgpr4
    %3:vgpr_32 = COPY $vgpr3
    %2:vgpr_32 = COPY $vgpr2
    %1:vgpr_32 = COPY $vgpr1
    %loop_end:vgpr_32 = COPY $vgpr0
    %res1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %vcmp:sreg_32 = V_CMP_LT_I32_e64 0, %loop_end, implicit $exec
    %mask:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
    %sand:sreg_32 = S_AND_B32 %mask, %vcmp, implicit-def dead $scc
    $exec_lo = S_MOV_B32_term %sand
    S_CBRANCH_EXECZ %bb.2, implicit $exec
    S_BRANCH %bb.1

  bb.1: ; loop preheader
    successors: %bb.3(0x80000000)

    %mask2:sgpr_32 = S_MOV_B32 0
    %count:sgpr_32 = S_MOV_B32 0
    %res1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    S_BRANCH %bb.3

  bb.2: ; flow
    successors: %bb.5(0x80000000)

    $exec_lo = S_OR_B32 $exec_lo, %mask, implicit-def $scc
    S_BRANCH %bb.5

  bb.3: ; loop
    successors: %bb.4(0x04000000), %bb.3(0x7c000000)

    %res1:vgpr_32 = V_ADD_F32_e64 0, %res1, 0, %1, 0, 0, implicit $mode, implicit $exec
    %res2:vgpr_32 = V_ADD_F32_e64 0, %res2, 0, %2, 0, 0, implicit $mode, implicit $exec
    %res3:vgpr_32 = V_ADD_F32_e64 0, %res3, 0, %3, 0, 0, implicit $mode, implicit $exec
    %res4:vgpr_32 = V_ADD_F32_e64 0, %res4, 0, %4, 0, 0, implicit $mode, implicit $exec
    %res5:vgpr_32 = V_ADD_F32_e64 0, %res5, 0, %5, 0, 0, implicit $mode, implicit $exec
    %res6:vgpr_32 = V_ADD_F32_e64 0, %res6, 0, %6, 0, 0, implicit $mode, implicit $exec
    %res7:vgpr_32 = V_ADD_F32_e64 0, %res7, 0, %7, 0, 0, implicit $mode, implicit $exec
    %res8:vgpr_32 = V_ADD_F32_e64 0, %res8, 0, %8, 0, 0, implicit $mode, implicit $exec
    %res9:vgpr_32 = V_ADD_F32_e64 0, %res9, 0, %9, 0, 0, implicit $mode, implicit $exec
    %res10:vgpr_32 = V_ADD_F32_e64 0, %res10, 0, %10, 0, 0, implicit $mode, implicit $exec
    %res11:vgpr_32 = V_ADD_F32_e64 0, %res11, 0, %11, 0, 0, implicit $mode, implicit $exec
    %res12:vgpr_32 = V_ADD_F32_e64 0, %res12, 0, %12, 0, 0, implicit $mode, implicit $exec
    %count:sgpr_32 = nuw nsw S_ADD_I32 %count, 1, implicit-def dead $scc
    %vcmp2:sreg_32 = V_CMP_GE_I32_e64 %count, %loop_end, implicit $exec
    %mask2:sgpr_32 = S_OR_B32 %vcmp2, %mask2, implicit-def $scc
    $exec_lo = S_ANDN2_B32_term $exec_lo, %mask2, implicit-def $scc
    S_CBRANCH_EXECNZ %bb.3, implicit $exec
    S_BRANCH %bb.4

  bb.4: ; flow
    successors: %bb.2(0x80000000)

    $exec_lo = S_OR_B32 $exec_lo, %mask2, implicit-def $scc
    S_BRANCH %bb.2

  bb.5: ; exit
    EXP_DONE 0, %res1, %res2, %res3, %res4, -1, 0, 15, implicit $exec
    EXP_DONE 0, %res5, %res6, %res7, %res8, -1, 0, 15, implicit $exec
    EXP_DONE 0, %res9, %res10, %res11, %res12, -1, 0, 15, implicit $exec
    S_ENDPGM 0

...