File: partial-regcopy-and-spill-missed-at-regalloc.ll

package info (click to toggle)
llvm-toolchain-14 1%3A14.0.6-12
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,496,180 kB
  • sloc: cpp: 5,593,972; ansic: 986,872; asm: 585,869; python: 184,223; objc: 72,530; lisp: 31,119; f90: 27,793; javascript: 9,780; pascal: 9,762; sh: 9,482; perl: 7,468; ml: 5,432; awk: 3,523; makefile: 2,538; xml: 953; cs: 573; fortran: 567
file content (60 lines) | stat: -rw-r--r-- 4,280 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=greedy,1 -verify-machineinstrs < %s | FileCheck -check-prefix=REGALLOC-GFX908 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=prologepilog -verify-machineinstrs < %s | FileCheck -check-prefix=PEI-GFX908 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=greedy,1 -verify-machineinstrs < %s | FileCheck -check-prefix=REGALLOC-GFX90A %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=prologepilog -verify-machineinstrs < %s | FileCheck -check-prefix=PEI-GFX90A %s

; Partial reg copy and spill missed during regalloc handled later at frame lowering.
define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
  ; REGALLOC-GFX908-LABEL: name: partial_copy
  ; REGALLOC-GFX908: bb.0 (%ir-block.0):
  ; REGALLOC-GFX908:  INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def [[VREG_64:%[0-9]+]]
  ; REGALLOC-GFX908:  SI_SPILL_V64_SAVE [[VREG_64]], %stack.0
  ; REGALLOC-GFX908:  [[V_MFMA_I32_4X4X4I8_A128:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64
  ; REGALLOC-GFX908:  [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0
  ; REGALLOC-GFX908:  GLOBAL_STORE_DWORDX2 undef %{{[0-9]+}}:vreg_64, [[SI_SPILL_V64_RESTORE]]
  ; REGALLOC-GFX908:  [[COPY_A128_TO_V128:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_A128]]
  ; REGALLOC-GFX908:  GLOBAL_STORE_DWORDX4 undef %{{[0-9]+}}:vreg_64, [[COPY_A128_TO_V128]]
  ;
  ; PEI-GFX908-LABEL: name: partial_copy
  ; PEI-GFX908: bb.0 (%ir-block.0):
  ; PEI-GFX908:  INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
  ; PEI-GFX908:  BUFFER_STORE_DWORD_OFFSET killed $vgpr0
  ; PEI-GFX908:  $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1
  ; PEI-GFX908:  renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64
  ; PEI-GFX908:  $vgpr0 = BUFFER_LOAD_DWORD_OFFSET
  ; PEI-GFX908:  $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4
  ; PEI-GFX908:  GLOBAL_STORE_DWORDX2 undef renamable ${{.*}}, killed renamable $vgpr0_vgpr1
  ; PEI-GFX908:  renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
  ; PEI-GFX908:  GLOBAL_STORE_DWORDX4 undef renamable ${{.*}}, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
  ;
  ; REGALLOC-GFX90A-LABEL: name: partial_copy
  ; REGALLOC-GFX90A: bb.0 (%ir-block.0):
  ; REGALLOC-GFX90A:  INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def [[VREG_64:%[0-9]+]]
  ; REGALLOC-GFX90A:  SI_SPILL_V64_SAVE [[VREG_64]], %stack.0
  ; REGALLOC-GFX90A:  [[V_MFMA_I32_4X4X4I8_A128:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64
  ; REGALLOC-GFX90A:  [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0
  ; REGALLOC-GFX90A:  GLOBAL_STORE_DWORDX2 undef %{{[0-9]+}}:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]]
  ; REGALLOC-GFX90A:  GLOBAL_STORE_DWORDX4 undef %{{[0-9]+}}:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_A128]]
  ;
  ; PEI-GFX90A-LABEL: name: partial_copy
  ; PEI-GFX90A: bb.0 (%ir-block.0):
  ; PEI-GFX90A:  INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
  ; PEI-GFX90A:  BUFFER_STORE_DWORD_OFFSET killed $vgpr0
  ; PEI-GFX90A:  $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1
  ; PEI-GFX90A:  renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64
  ; PEI-GFX90A:  $vgpr0 = BUFFER_LOAD_DWORD_OFFSET
  ; PEI-GFX90A:  $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4
  ; PEI-GFX90A:  GLOBAL_STORE_DWORDX2 undef renamable ${{.*}}, killed renamable $vgpr0_vgpr1
  ; PEI-GFX90A:  GLOBAL_STORE_DWORDX4 undef renamable ${{.*}}, killed renamable $agpr0_agpr1_agpr2_agpr3
  %v0 = call <4 x i32> asm sideeffect "; def $0", "=v" ()
  %v1 = call <2 x i32> asm sideeffect "; def $0", "=v" ()
  %mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
  store volatile <4 x i32> %v0, <4 x i32> addrspace(1)* undef
  store volatile <2 x i32> %v1, <2 x i32> addrspace(1)* undef
  store volatile <4 x i32> %mai, <4 x i32> addrspace(1)* undef
  ret void
}

declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)

attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }