File: extend-wwm-virt-reg-liveness.mir

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (279 lines) | stat: -rw-r--r-- 11,309 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

# Tests to check the conservative lieness extension for the wwm registers during SGPR spill lowering.

# Even though the VGPR can be shared for the wwm-operand (writelane/readlane get inserted for the SGPR spills)
# and the regular operand (%0), they get different registers as we conservatively extend the liveness of the
# wwm-operands.
---
name:            test_single_block
tracksRegLiveness: true
frameInfo:
  maxAlignment:    4
stack:
  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
  hasSpilledSGPRs: true
body:             |
  bb.0:
    liveins: $sgpr4, $vgpr2_vgpr3
    ; GCN-LABEL: name: test_single_block
    ; GCN: liveins: $sgpr4, $vgpr2_vgpr3
    ; GCN-NEXT: {{  $}}
    ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
    ; GCN-NEXT: S_NOP 0
    ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
    ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
    ; GCN-NEXT: KILL killed renamable $vgpr0
    ; GCN-NEXT: SI_RETURN
    SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    S_NOP 0
    renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
    GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
    SI_RETURN
...

# Due to the presence of wwm-operand in the divergent flow, the regular variable (%0) shouldn't get the same register
# allocated for the wwm-operand in writelane/readlane when the SGPR spill is lowered.

---
name:            test_if_else
tracksRegLiveness: true
frameInfo:
  maxAlignment:    4
stack:
  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
  hasSpilledSGPRs: true
body:             |
  ; GCN-LABEL: name: test_if_else
  ; GCN: bb.0:
  ; GCN-NEXT:   successors: %bb.1(0x80000000)
  ; GCN-NEXT:   liveins: $sgpr6, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
  ; GCN-NEXT:   S_BRANCH %bb.1
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.1:
  ; GCN-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
  ; GCN-NEXT:   liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.2:
  ; GCN-NEXT:   successors: %bb.3(0x80000000)
  ; GCN-NEXT:   liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
  ; GCN-NEXT:   S_NOP 0
  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
  ; GCN-NEXT:   S_BRANCH %bb.3
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.3:
  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
  ; GCN-NEXT:   S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
  ; GCN-NEXT:   KILL killed renamable $vgpr0
  ; GCN-NEXT:   SI_RETURN
  bb.0:
    liveins: $sgpr6, $sgpr10_sgpr11
    S_BRANCH %bb.1
  bb.1:
    liveins: $sgpr6, $sgpr10_sgpr11
    %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
    S_CBRANCH_EXECZ %bb.3, implicit $exec
  bb.2:
    liveins: $sgpr6, $sgpr10_sgpr11
    SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    S_NOP 0
    renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
    S_BRANCH %bb.3
  bb.3:
    liveins: $sgpr10_sgpr11
    $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
    S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
    SI_RETURN
...

# The wwm-register usage outside the loop should have the interference marked with
# all the regular virtual registers used in the test. The divergent loop index value (%1)
# can actually share the same VGPR as the wwm-operand. But since we extend the liveness of
# the wwm operand, an interference will always exist between them.

---
name:            test_loop
tracksRegLiveness: true
frameInfo:
  maxAlignment:    4
stack:
  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
  hasSpilledSGPRs: true
body:             |
  ; GCN-LABEL: name: test_loop
  ; GCN: bb.0:
  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; GCN-NEXT:   liveins: $sgpr4, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.1:
  ; GCN-NEXT:   successors: %bb.2(0x80000000)
  ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
  ; GCN-NEXT:   S_NOP 0
  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
  ; GCN-NEXT:   S_BRANCH %bb.2
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.2:
  ; GCN-NEXT:   successors: %bb.3(0x80000000)
  ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $vgpr1, $sgpr10_sgpr11
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
  ; GCN-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
  ; GCN-NEXT:   S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 5, implicit $exec
  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
  ; GCN-NEXT:   S_BRANCH %bb.3
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.3:
  ; GCN-NEXT:   successors: %bb.5(0x40000000), %bb.4(0x40000000)
  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   $vcc = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec
  ; GCN-NEXT:   $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.5, implicit $scc
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.4:
  ; GCN-NEXT:   successors: %bb.3(0x80000000)
  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr6_sgpr7
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr1 = V_SUB_U32_e32 1, killed $vgpr1, implicit $exec
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
  ; GCN-NEXT:   S_BRANCH %bb.3
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.5:
  ; GCN-NEXT:   liveins: $vgpr0, $sgpr6_sgpr7
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
  ; GCN-NEXT:   KILL killed renamable $vgpr0
  ; GCN-NEXT:   SI_RETURN
  bb.0:
    liveins: $sgpr4, $sgpr10_sgpr11
    %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
    S_CBRANCH_EXECZ %bb.2, implicit $exec
  bb.1:
    liveins: $sgpr4, $sgpr10_sgpr11
    SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    S_NOP 0
    renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
    S_BRANCH %bb.2
  bb.2:
    liveins: $sgpr4, $sgpr10_sgpr11
    S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
    $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
    S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
    %1:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    S_CBRANCH_EXECZ %bb.3, implicit $exec
    S_BRANCH %bb.3
  bb.3:
    $vcc = V_CMP_EQ_U32_e64 0, %1:vgpr_32, implicit $exec
    $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
    S_CBRANCH_SCC1 %bb.5, implicit $scc
  bb.4:
    liveins: $sgpr6_sgpr7
    %2:vgpr_32 = V_SUB_U32_e32 1, %1:vgpr_32, implicit $exec
    %1:vgpr_32 = V_MOV_B32_e32 %2:vgpr_32, implicit $exec
    S_BRANCH %bb.3
  bb.5:
    liveins: $sgpr6_sgpr7
    $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
    SI_RETURN
...

# There must be one KILL instruction for the wwm-operand in every return block.
# Due to that, the wwm-register allocated should be different from the ones
# allocated for the regular virtual registers.

---
name:            test_multiple_return_blocks
tracksRegLiveness: true
frameInfo:
  maxAlignment:    4
stack:
  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
  hasSpilledSGPRs: true
body:             |
  ; GCN-LABEL: name: test_multiple_return_blocks
  ; GCN: bb.0:
  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; GCN-NEXT:   liveins: $sgpr4, $vgpr2_vgpr3
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.1:
  ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
  ; GCN-NEXT:   S_NOP 0
  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
  ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
  ; GCN-NEXT:   KILL killed renamable $vgpr0
  ; GCN-NEXT:   SI_RETURN
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.2:
  ; GCN-NEXT:   liveins: $vgpr0, $vgpr2_vgpr3
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
  ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
  ; GCN-NEXT:   KILL killed renamable $vgpr0
  ; GCN-NEXT:   SI_RETURN
  bb.0:
    liveins: $sgpr4, $vgpr2_vgpr3
    S_CBRANCH_EXECZ %bb.2, implicit $exec
  bb.1:
    liveins: $sgpr4, $vgpr2_vgpr3
    SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    S_NOP 0
    renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
    %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
    GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
    SI_RETURN
  bb.2:
    liveins: $vgpr2_vgpr3
    %1:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
    GLOBAL_STORE_DWORD $vgpr2_vgpr3, %1:vgpr_32, 0, 0, implicit $exec
    SI_RETURN
...