1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s
# This previously would produce a bundle that could not be satisfied
# due to using nearly the entire register budget and not considering
# the alignment requirement of large SGPR tuples.
---
name: soft_clause_bundle_out_of_registers
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
occupancy: 6
body: |
; CHECK-LABEL: name: soft_clause_bundle_out_of_registers
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
%0:sgpr_64 = COPY $sgpr4_sgpr5
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), addrspace 4)
%2:vreg_64 = IMPLICIT_DEF
bb.1:
undef %3.sub0:sreg_64 = S_ADD_U32 %1.sub0, 0, implicit-def $scc
%3.sub1:sreg_64 = S_ADDC_U32 %1.sub1, 0, implicit-def dead $scc, implicit killed $scc
%4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4)
%5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4)
%6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4)
%7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4)
%8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4)
%9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4)
%10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4)
dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
dead %11:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub1, 0, 0, implicit $mode, implicit $exec
dead %12:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub2, 0, 0, implicit $mode, implicit $exec
dead %13:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub3, 0, 0, implicit $mode, implicit $exec
dead %14:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub4, 0, 0, implicit $mode, implicit $exec
dead %15:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub5, 0, 0, implicit $mode, implicit $exec
dead %16:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub6, 0, 0, implicit $mode, implicit $exec
dead %17:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub7, 0, 0, implicit $mode, implicit $exec
dead %18:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub8, 0, 0, implicit $mode, implicit $exec
dead %19:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub9, 0, 0, implicit $mode, implicit $exec
dead %20:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub10, 0, 0, implicit $mode, implicit $exec
dead %21:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub11, 0, 0, implicit $mode, implicit $exec
dead %22:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub12, 0, 0, implicit $mode, implicit $exec
dead %23:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub13, 0, 0, implicit $mode, implicit $exec
dead %24:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub14, 0, 0, implicit $mode, implicit $exec
dead %25:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub15, 0, 0, implicit $mode, implicit $exec
dead %26:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub0, 0, 0, implicit $mode, implicit $exec
dead %27:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub1, 0, 0, implicit $mode, implicit $exec
dead %28:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub2, 0, 0, implicit $mode, implicit $exec
dead %29:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub3, 0, 0, implicit $mode, implicit $exec
dead %30:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub4, 0, 0, implicit $mode, implicit $exec
dead %31:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub5, 0, 0, implicit $mode, implicit $exec
dead %32:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub6, 0, 0, implicit $mode, implicit $exec
dead %33:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub7, 0, 0, implicit $mode, implicit $exec
dead %34:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub8, 0, 0, implicit $mode, implicit $exec
dead %35:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub9, 0, 0, implicit $mode, implicit $exec
dead %36:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub10, 0, 0, implicit $mode, implicit $exec
dead %37:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub11, 0, 0, implicit $mode, implicit $exec
dead %38:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub12, 0, 0, implicit $mode, implicit $exec
dead %39:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub13, 0, 0, implicit $mode, implicit $exec
dead %40:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub14, 0, 0, implicit $mode, implicit $exec
dead %41:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub15, 0, 0, implicit $mode, implicit $exec
dead %42:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub0, 0, 0, implicit $mode, implicit $exec
dead %43:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub1, 0, 0, implicit $mode, implicit $exec
dead %44:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub2, 0, 0, implicit $mode, implicit $exec
dead %45:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub3, 0, 0, implicit $mode, implicit $exec
dead %46:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub4, 0, 0, implicit $mode, implicit $exec
dead %47:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub5, 0, 0, implicit $mode, implicit $exec
dead %48:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub8, 0, 0, implicit $mode, implicit $exec
dead %49:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub9, 0, 0, implicit $mode, implicit $exec
dead %50:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub10, 0, 0, implicit $mode, implicit $exec
dead %51:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub11, 0, 0, implicit $mode, implicit $exec
dead %52:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub12, 0, 0, implicit $mode, implicit $exec
dead %53:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub14, 0, 0, implicit $mode, implicit $exec
dead %54:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub15, 0, 0, implicit $mode, implicit $exec
dead %55:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %9.sub10, 0, 0, implicit $mode, implicit $exec
dead %56:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %6.sub0, 0, 0, implicit $mode, implicit $exec
dead %57:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %7.sub10, 0, 0, implicit $mode, implicit $exec
dead %58:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %10.sub0, 0, 0, implicit $mode, implicit $exec
S_CMP_LG_U32 0, 0, implicit-def $scc
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...
# Case with simple clause which exceeds the pressure limit, though
# didn't hit the register allocator error.
---
name: simple_huge_reg_tuple_clause
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
waveLimiter: false
memoryBound: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
occupancy: 10
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
; CHECK-LABEL: name: simple_huge_reg_tuple_clause
; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4)
; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORDX16_IMM]]
%0:sreg_64 = COPY $sgpr4_sgpr5
%1:sreg_64 = S_MOV_B64 0
%2:sreg_64 = S_MOV_B64 1
%3:sreg_64 = S_MOV_B64 2
%4:sreg_64 = S_MOV_B64 3
%5:sreg_64 = S_MOV_B64 4
%6:sreg_64 = S_MOV_B64 5
%7:sreg_64 = S_MOV_B64 6
%8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4)
%9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4)
%10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4)
%11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4)
S_NOP 0, implicit %8
S_NOP 0, implicit %9
S_NOP 0, implicit %10
S_NOP 0, implicit %11
S_NOP 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5
S_NOP 0, implicit %6
S_NOP 0, implicit %7
S_ENDPGM 0
...
|