File: llvm.amdgcn.iglp.opt.single.2c.mir

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (217 lines) | stat: -rw-r--r-- 19,623 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s

--- |
  define amdgpu_kernel void @single-wave-phase-2c(i32 addrspace(3)* noalias %in0, i32 addrspace(3)* noalias %in1, i32 addrspace(3)* noalias %in2, i32 addrspace(3)* noalias %in3, i32 addrspace(3)* noalias %in4, i32 addrspace(3)* noalias %in5, i32 addrspace(3)* noalias %in6, i32 addrspace(3)* noalias %in7, i32 addrspace(3)* noalias %in8, i32 addrspace(3)* noalias %in9, i32 addrspace(3)* noalias %in10, i32 addrspace(3)* noalias %in11, i32 addrspace(7)* noalias %in12, i32 addrspace(7)* noalias %in13, i32 addrspace(7)* noalias %in14, i32 addrspace(7)* noalias %in15, i32 addrspace(7)* noalias %in16, i32 addrspace(7)* noalias %in17) #0 { ret void }


  !0 = distinct !{!0}
  !1 = !{!1, !0}
...

---
name:            single-wave-phase-2c
tracksRegLiveness: true
machineFunctionInfo:
  occupancy:       1
body:      |
  ; GCN-LABEL: name: single-wave-phase-2c
  ; GCN: bb.0:
  ; GCN-NEXT:   successors: %bb.1(0x80000000)
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:av_512_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:av_512_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF7:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF16:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   dead [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF28:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF29:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF30:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF31:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF32:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.1:
  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[COPY:%[0-9]+]]:areg_512_align2 = COPY [[DEF1]]
  ; GCN-NEXT:   [[V_MFMA_F32_32X32X8F16_mac_e64_:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[V_MFMA_F32_32X32X8F16_mac_e64_]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF33]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF21]], implicit $exec
  ; GCN-NEXT:   [[V_MFMA_F32_32X32X8F16_mac_e64_1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[V_MFMA_F32_32X32X8F16_mac_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF22]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF23]], implicit $exec
  ; GCN-NEXT:   [[V_MFMA_F32_32X32X8F16_mac_e64_1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[V_MFMA_F32_32X32X8F16_mac_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF4]], [[DEF16]], 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF6]], [[DEF7]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   dead [[V_MFMA_F32_32X32X8F16_mac_e64_1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[V_MFMA_F32_32X32X8F16_mac_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[COPY:%[0-9]+]]:areg_512_align2 = COPY [[DEF]]
  ; GCN-NEXT:   undef [[DEF17]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF30]], implicit $exec
  ; GCN-NEXT:   [[DEF17]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF15]], [[DEF14]], [[DEF30]], implicit $exec
  ; GCN-NEXT:   [[DEF17]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF8]], [[DEF9]], [[DEF30]], implicit $exec
  ; GCN-NEXT:   [[DEF17]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF11]], [[DEF10]], [[DEF30]], implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF5]], [[DEF17]], 0, 0, implicit $exec :: (store (s128) into %ir.in8, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_MFMA_F32_32X32X8F16_mac_e64_1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_4]].sub0_sub1, [[V_MFMA_F32_32X32X8F16_mac_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   undef [[DEF18]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF8]], [[DEF9]], [[DEF31]], implicit $exec
  ; GCN-NEXT:   [[DEF18]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF11]], [[DEF10]], [[DEF31]], implicit $exec
  ; GCN-NEXT:   [[DEF18]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF31]], implicit $exec
  ; GCN-NEXT:   [[DEF18]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF15]], [[DEF14]], [[DEF31]], implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF5]], [[DEF18]], 16, 0, implicit $exec :: (store (s128) into %ir.in9, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_MFMA_F32_32X32X8F16_mac_e64_2:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_4]].sub2_sub3, [[V_MFMA_F32_32X32X8F16_mac_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_1]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_2]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in12, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_3]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in13, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[V_MFMA_F32_32X32X8F16_mac_e64_2:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_5]].sub0_sub1, [[V_MFMA_F32_32X32X8F16_mac_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF24]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF25]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF26]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF27]], implicit $exec
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_4]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in14, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_5]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in15, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_6]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in16, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_7]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in17, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   dead [[V_MFMA_F32_32X32X8F16_mac_e64_2:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_5]].sub2_sub3, [[V_MFMA_F32_32X32X8F16_mac_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   IGLP_OPT 1
  ; GCN-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[S_ADD_I32_]], -1, implicit-def dead $scc
  ; GCN-NEXT:   S_CMP_LG_U32 [[S_ADD_I32_]], 0, implicit-def $scc
  ; GCN-NEXT:   [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_8]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_9]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_10]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_11]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 64, [[V_ADD_U32_e32_12]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_13]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_14]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_15]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[V_ADD_U32_e32_16]], implicit $exec
  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.2:
  ; GCN-NEXT:   S_ENDPGM 0
  bb.0:
  %1076:av_512_align2 = IMPLICIT_DEF
  %1073:av_512_align2 = IMPLICIT_DEF
  %25:vgpr_32 = IMPLICIT_DEF
  %26:vgpr_32 = IMPLICIT_DEF
  %13:vgpr_32 = IMPLICIT_DEF
  %15:vgpr_32 = IMPLICIT_DEF
  %1215:vgpr_32 = IMPLICIT_DEF
  %381:sgpr_128 = IMPLICIT_DEF
  %1225:vgpr_32 = IMPLICIT_DEF
  %1224:vgpr_32 = IMPLICIT_DEF
  %1226:vgpr_32 = IMPLICIT_DEF
  %1227:vgpr_32 = IMPLICIT_DEF
  %1228:vgpr_32 = IMPLICIT_DEF
  %1229:vgpr_32 = IMPLICIT_DEF
  %1230:vgpr_32 = IMPLICIT_DEF
  %1231:vgpr_32 = IMPLICIT_DEF
  %1232:av_128_align2 = IMPLICIT_DEF
  %1091:vreg_128_align2 = IMPLICIT_DEF
  %1067:vreg_128_align2 = IMPLICIT_DEF
  %27:vgpr_32 = IMPLICIT_DEF
  %1216:vgpr_32 = IMPLICIT_DEF
  %1217:vgpr_32 = IMPLICIT_DEF
  %1218:vgpr_32 = IMPLICIT_DEF
  %1219:vgpr_32 = IMPLICIT_DEF
  %1220:vgpr_32 = IMPLICIT_DEF
  %1221:vgpr_32 = IMPLICIT_DEF
  %1222:vgpr_32 = IMPLICIT_DEF
  %1223:vgpr_32 = IMPLICIT_DEF
  %29:sreg_32 = IMPLICIT_DEF
  %1214:sreg_32 = IMPLICIT_DEF
  %419:sreg_32 = IMPLICIT_DEF
  %421:sreg_32 = IMPLICIT_DEF
  %387:sgpr_128 = IMPLICIT_DEF
  %1216:vgpr_32 = IMPLICIT_DEF

  bb.1:
  IGLP_OPT 1
  %489:av_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
  %494:av_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
  %499:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
  %504:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
  %527:areg_512_align2 = COPY %1073:av_512_align2
  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub0_sub1:av_128_align2, %499.sub0_sub1:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub2_sub3:av_128_align2, %499.sub2_sub3:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub0_sub1:av_128_align2, %504.sub0_sub1:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub2_sub3:av_128_align2, %504.sub2_sub3:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %530:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
  %535:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
  %554:areg_512_align2 = COPY %1076:av_512_align2
  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub0_sub1:av_128_align2, %530.sub0_sub1:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub2_sub3:av_128_align2, %530.sub2_sub3:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub0_sub1:av_128_align2, %535.sub0_sub1:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub2_sub3:av_128_align2, %535.sub2_sub3:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  DS_WRITE_B128_gfx9 %13:vgpr_32, %1232:av_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3)
  %1232:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1215:vgpr_32, %381:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7)
  %1091.sub0:vreg_128_align2 = V_PERM_B32_e64 %1225:vgpr_32, %1224:vgpr_32, %419:sreg_32, implicit $exec
  %1067.sub0:vreg_128_align2 = V_PERM_B32_e64 %1225:vgpr_32, %1224:vgpr_32, %421:sreg_32, implicit $exec
  %1091.sub1:vreg_128_align2 = V_PERM_B32_e64 %1227:vgpr_32, %1226:vgpr_32, %419:sreg_32, implicit $exec
  %1067.sub1:vreg_128_align2 = V_PERM_B32_e64 %1227:vgpr_32, %1226:vgpr_32, %421:sreg_32, implicit $exec
  %1091.sub2:vreg_128_align2 = V_PERM_B32_e64 %1229:vgpr_32, %1228:vgpr_32, %419:sreg_32, implicit $exec
  %1067.sub2:vreg_128_align2 = V_PERM_B32_e64 %1229:vgpr_32, %1228:vgpr_32, %421:sreg_32, implicit $exec
  %1091.sub3:vreg_128_align2 = V_PERM_B32_e64 %1231:vgpr_32, %1230:vgpr_32, %419:sreg_32, implicit $exec
  %1067.sub3:vreg_128_align2 = V_PERM_B32_e64 %1231:vgpr_32, %1230:vgpr_32, %421:sreg_32, implicit $exec
  DS_WRITE_B128_gfx9 %15:vgpr_32, %1091:vreg_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in8, !alias.scope !0, addrspace 3)
  DS_WRITE_B128_gfx9 %15:vgpr_32, %1067:vreg_128_align2, 16, 0, implicit $exec :: (store (s128) into %ir.in9, !alias.scope !0, addrspace 3)
  %572:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1216:vgpr_32, implicit $exec
  %1224:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %572:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 7)
  %573:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1217:vgpr_32, implicit $exec
  %1225:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %573:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 7)
  %574:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1218:vgpr_32, implicit $exec
  %1226:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %574:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in12, !alias.scope !0, addrspace 7)
  %575:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1219:vgpr_32, implicit $exec
  %1227:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %575:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in13, !alias.scope !0, addrspace 7)
  %576:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1220:vgpr_32, implicit $exec
  %1228:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %576:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in14, !alias.scope !0, addrspace 7)
  %577:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1221:vgpr_32, implicit $exec
  %1229:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %577:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in15, !alias.scope !0, addrspace 7)
  %578:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1222:vgpr_32, implicit $exec
  %1230:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %578:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in16, !alias.scope !0, addrspace 7)
  %579:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1223:vgpr_32, implicit $exec
  %1231:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %579:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in17, !alias.scope !0, addrspace 7)
  %1223:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1223:vgpr_32, implicit $exec
  %1222:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1222:vgpr_32, implicit $exec
  %1221:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1221:vgpr_32, implicit $exec
  %1220:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1220:vgpr_32, implicit $exec
  %1219:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1219:vgpr_32, implicit $exec
  %1218:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1218:vgpr_32, implicit $exec
  %1217:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1217:vgpr_32, implicit $exec
  %1216:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1216:vgpr_32, implicit $exec
  %1215:vgpr_32 = V_ADD_U32_e32 64, %1215:vgpr_32, implicit $exec
  %1214:sreg_32 = nsw S_ADD_I32 %1214:sreg_32, -1, implicit-def dead $scc
  S_CMP_LG_U32 %1214:sreg_32, 0, implicit-def $scc
  S_CBRANCH_SCC1 %bb.1, implicit killed $scc

  bb.2:
  S_ENDPGM 0
---