1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,1 -stop-after=prologepilog -verify-machineinstrs -verify-regalloc -o - %s | FileCheck --check-prefixes=GCN %s
# The VGPR pair spilled and restored around the callsite is used in the next basic block.
#
# AMDGPU target spill hooks storeRegToStackSlot/loadRegFromStackSlot handle the register spills via
# spill pseudos to insert a single instruction per spill to tackle the limitation during inline spiller
# that incorrectly updates the Liveintervals in case of a spill lowered into multiple instructions.
# AV spills were handled earlier by converting them into equivalent VGPR spills by adding appropriate copies.
# The multiple instructions (a copy + vgpr spill pseudo) introduced an incorrect liverange that caused a
# crash during RA. It is fixed by introducing AV* spill pseudos to ensure a single instruction per spill and
# the test started compiling successfully.
---
name: test_av_spill_cross_bb_usage
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
body: |
; GCN-LABEL: name: test_av_spill_cross_bb_usage
; GCN: bb.0:
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $exec:0x000000000000000F, $sgpr30, $sgpr31, $vgpr0:0x0000000000000003, $vgpr1:0x0000000000000003, $vgpr2:0x0000000000000003, $vgpr3:0x0000000000000003, $vgpr4:0x0000000000000003, $vgpr5:0x0000000000000003, $vgpr6:0x0000000000000003, $vgpr7:0x0000000000000003, $vgpr8:0x0000000000000003, $vgpr9:0x0000000000000003, $vgpr10:0x0000000000000003, $vgpr11:0x0000000000000003, $vgpr40, $sgpr30_sgpr31, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F
; GCN-NEXT: {{ $}}
; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_highregs, implicit-def dead $vgpr0
; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1 + 4, addrspace 5)
; GCN: bb.2:
; GCN-NEXT: liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
; GCN-NEXT: FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
; GCN: S_SETPC_B64_return undef $sgpr30_sgpr31
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30, $sgpr31, $sgpr30_sgpr31
undef %0.sub1:vreg_64 = COPY $vgpr15
%0.sub0:vreg_64 = COPY $vgpr14
undef %1.sub1:vreg_64 = COPY $vgpr13
%1.sub0:vreg_64 = COPY $vgpr12
undef %2.sub1:vreg_64 = COPY $vgpr11
%2.sub0:vreg_64 = COPY $vgpr10
undef %3.sub1:vreg_64 = COPY $vgpr9
%3.sub0:vreg_64 = COPY $vgpr8
undef %4.sub1:vreg_64 = COPY $vgpr7
%4.sub0:vreg_64 = COPY $vgpr6
undef %5.sub1:vreg_64 = COPY $vgpr5
%5.sub0:vreg_64 = COPY $vgpr4
undef %6.sub1:vreg_64 = COPY $vgpr3
%6.sub0:vreg_64 = COPY $vgpr2
undef %7.sub1:vreg_64 = COPY $vgpr1
%7.sub0:vreg_64 = COPY $vgpr0
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
S_BRANCH %bb.1
bb.1:
liveins: $vgpr40, $sgpr30, $sgpr31, $sgpr30_sgpr31
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
$vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
$vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_highregs, implicit-def dead $vgpr0
%8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
FLAT_STORE_DWORDX2 %4, %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
FLAT_STORE_DWORDX2 %2, %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
bb.2:
liveins: $vgpr40
%9:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
FLAT_STORE_DWORDX2 undef %10:vreg_64, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
FLAT_STORE_DWORDX2 %9, %0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
S_SETPC_B64_return undef $sgpr30_sgpr31
...
|