1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
---
# Test that we don't do silly things when there is no whole wave mode in the
# shader (aka bb.1).
#
name: test_no_wwm
alignment: 1
exposesReturnsTwice: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_no_wwm
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 5, [[COPY2]], 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY3]], implicit-def $scc
; CHECK-NEXT: $vgpr8 = COPY [[COPY2]]
; CHECK-NEXT: $sgpr0 = COPY [[COPY]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY4]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
bb.0:
successors: %bb.1, %bb.2
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
%9:sreg_32 = COPY $sgpr0
undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
%1.sub1:ccr_sgpr_64 = COPY $sgpr2
%37:vgpr_32 = COPY $vgpr8
%14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
%16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
%38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
$exec_lo = S_MOV_B32_term %38:sreg_32
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
%37:vgpr_32 = V_ADD_U32_e64 5, %37:vgpr_32, 0, implicit $exec
bb.2:
$exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
$vgpr8 = COPY %37:vgpr_32
$sgpr0 = COPY %9:sreg_32
%2:sreg_32 = COPY $sgpr0
SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %2:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...
---
# Test that we handle WWM in the shader correctly.
#
name: test_wwm_bb1
alignment: 1
exposesReturnsTwice: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_wwm_bb1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 24, [[COPY3]], 0, implicit $exec
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32_xm0_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY3]], 0, 71, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 42, [[V_SET_INACTIVE_B32_]], 0, implicit $exec
; CHECK-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
; CHECK-NEXT: early-clobber [[COPY2]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e64_]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc
; CHECK-NEXT: $vgpr8 = COPY [[COPY2]]
; CHECK-NEXT: $vgpr9 = COPY [[COPY3]]
; CHECK-NEXT: $sgpr0 = COPY [[COPY]]
; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
bb.0:
successors: %bb.1, %bb.2
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
%9:sreg_32 = COPY $sgpr0
undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
%1.sub1:ccr_sgpr_64 = COPY $sgpr2
%40:vgpr_32 = COPY $vgpr9
%36:vgpr_32 = COPY $vgpr8
%14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
%16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
%38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
$exec_lo = S_MOV_B32_term %38:sreg_32
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
%36:vgpr_32 = V_ADD_U32_e64 24, %36:vgpr_32, 0, implicit $exec
%20:sreg_32_xm0_xexec = IMPLICIT_DEF
%19:vgpr_32 = V_SET_INACTIVE_B32 0, %36:vgpr_32, 0, 71, undef %20, implicit $exec, implicit-def $scc
%18:vgpr_32 = V_ADD_U32_e64 42, %19:vgpr_32, 0, implicit $exec
%40:vgpr_32 = STRICT_WWM %18:vgpr_32, implicit $exec
bb.2:
$exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
$vgpr8 = COPY %40:vgpr_32
$vgpr9 = COPY %36:vgpr_32
$sgpr0 = COPY %9:sreg_32
SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %9:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
|