1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-vgpr-index-mode -run-pass=greedy -stress-regalloc=16 -o - %s | FileCheck -check-prefixes=GCN %s
# An interval for a register that was partially defined was split, creating
# a new use (a COPY) which was reached by the undef point. In particular,
# there was a subrange of the new register which was reached by an "undef"
# point. When the code in extendSegmentsToUses verified value numbers between
# the new and the old live ranges, it did not account for this kind of a
# situation and asserted expecting the old value to exist. For a PHI node
# it is legal to have a missing predecessor value as long as the end of
# the predecessor is jointly dominated by the undefs.
#
# A simplified form of this can be illustrated as
#
# bb.1:
# %0:vreg_64 = IMPLICIT_DEF
# ...
# S_CBRANCH_SCC1 %bb.2, implicit $vcc
# S_BRANCH %bb.3
#
# bb.2:
# ; predecessors: %bb.1, %bb.4
# dead %1:vreg_64 = COPY %0:vreg_64 ; This is the point of the inserted split
# ...
# S_BRANCH %bb.5
#
# bb.3:
# ; predecessors: %bb.1
# undef %0.sub0:vreg_64 = COPY %123:sreg_32 ; undef point for %0.sub1
# ...
# S_BRANCH %bb.4
#
# bb.4
# ; predecessors: %bb.4
# ...
# S_BRANCH %bb.2
#
# This test exposes this scenario which caused previously caused an assert
---
name: _amdgpu_ps_main
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
liveins:
- { reg: '$vgpr2', virtual-reg: '%0' }
- { reg: '$vgpr3', virtual-reg: '%1' }
- { reg: '$vgpr4', virtual-reg: '%2' }
body: |
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $vgpr2, $vgpr3, $vgpr4
%2:vgpr_32 = COPY $vgpr4
%1:vgpr_32 = COPY $vgpr3
%0:vgpr_32 = COPY $vgpr2
S_CBRANCH_SCC0 %bb.2, implicit undef $scc
bb.1:
successors: %bb.5(0x80000000)
undef %3.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%3.sub1:vreg_128 = COPY %3.sub0
%3.sub2:vreg_128 = COPY %3.sub0
S_BRANCH %bb.5
bb.2:
successors: %bb.3(0x40000000), %bb.4(0x40000000)
S_CBRANCH_SCC0 %bb.4, implicit undef $scc
bb.3:
successors: %bb.5(0x80000000)
undef %3.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%3.sub1:vreg_128 = COPY %3.sub0
S_BRANCH %bb.5
bb.4:
successors: %bb.5(0x80000000)
%3:vreg_128 = IMPLICIT_DEF
bb.5:
successors: %bb.6(0x40000000), %bb.22(0x40000000)
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
S_CBRANCH_SCC1 %bb.22, implicit undef $scc
S_BRANCH %bb.6
bb.6:
successors: %bb.8(0x40000000), %bb.11(0x40000000)
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
dead %6:vgpr_32 = V_MUL_F32_e32 0, undef %7:vgpr_32, implicit $mode, implicit $exec
dead %8:vgpr_32 = V_MUL_F32_e32 0, %2, implicit $mode, implicit $exec
undef %9.sub1:vreg_64 = V_MUL_F32_e32 0, %1, implicit $mode, implicit $exec
undef %10.sub0:vreg_128 = V_MUL_F32_e32 0, %0, implicit $mode, implicit $exec
undef %11.sub0:sgpr_256 = S_MOV_B32 0
%11.sub1:sgpr_256 = COPY %11.sub0
%11.sub2:sgpr_256 = COPY %11.sub0
%11.sub3:sgpr_256 = COPY %11.sub0
%11.sub4:sgpr_256 = COPY %11.sub0
%11.sub5:sgpr_256 = COPY %11.sub0
%11.sub6:sgpr_256 = COPY %11.sub0
%11.sub7:sgpr_256 = COPY %11.sub0
%12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4)
%14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%15:vreg_128 = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.8, implicit undef $scc
S_BRANCH %bb.11
bb.7:
successors: %bb.13(0x80000000)
; GCN-LABEL: bb.7:
; GCN: undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%15.sub1:vreg_128 = COPY %15.sub0
%15.sub2:vreg_128 = COPY %15.sub0
%5:vgpr_32 = IMPLICIT_DEF
S_BRANCH %bb.13
bb.8:
successors: %bb.9(0x40000000), %bb.10(0x40000000)
S_CBRANCH_SCC0 %bb.10, implicit undef $scc
bb.9:
successors: %bb.12(0x80000000)
; GCN-LABEL: bb.9:
; GCN: undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%15.sub1:vreg_128 = COPY %15.sub0
%15.sub2:vreg_128 = COPY %15.sub0
S_BRANCH %bb.12
bb.10:
successors: %bb.12(0x80000000)
; GCN-LABEL: bb.10:
; GCN: undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
%15.sub1:vreg_128 = COPY %15.sub0
%15.sub2:vreg_128 = COPY %15.sub0
S_BRANCH %bb.12
bb.11:
successors: %bb.7(0x40000000), %bb.13(0x40000000)
%16:sreg_64 = V_CMP_NE_U32_e64 0, %14, implicit $exec
%17:sreg_64 = S_AND_B64 $exec, %16, implicit-def dead $scc
$vcc = COPY %17
S_CBRANCH_VCCNZ %bb.7, implicit $vcc
S_BRANCH %bb.13
bb.12:
successors: %bb.11(0x80000000)
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
S_BRANCH %bb.11
bb.13:
successors: %bb.15(0x40000000), %bb.14(0x40000000)
%18:vgpr_32 = V_MAD_F32_e64 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $mode, implicit $exec
%19:vgpr_32 = V_MAD_F32_e64 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0 :: (dereferenceable invariant load (s128))
%22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $mode, implicit $exec
%23:vgpr_32 = V_MAD_F32_e64 0, %18, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%24:vgpr_32 = COPY %20.sub3
%25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $mode, implicit $exec
%26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0 :: (dereferenceable invariant load (s128))
%28:vgpr_32 = V_MAD_F32_e64 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $mode, implicit $exec
%29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $mode, implicit $exec
%30:vgpr_32 = V_RCP_F32_e32 %29, implicit $mode, implicit $exec
%25:vgpr_32 = V_MAC_F32_e32 0, %18, %25, implicit $mode, implicit $exec
%31:vgpr_32 = V_MAD_F32_e64 0, target-flags(amdgpu-gotprel) 0, 0, %12.sub0, 0, %24, 0, 0, implicit $mode, implicit $exec
%32:vgpr_32 = V_ADD_F32_e32 %25, %31, implicit $mode, implicit $exec
%33:vgpr_32 = V_MUL_F32_e32 %22, %30, implicit $mode, implicit $exec
%34:vgpr_32 = V_MUL_F32_e32 %23, %30, implicit $mode, implicit $exec
%35:vgpr_32 = V_MUL_F32_e32 %32, %30, implicit $mode, implicit $exec
%36:vgpr_32 = V_MUL_F32_e32 0, %34, implicit $mode, implicit $exec
%36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $mode, implicit $exec
%37:vgpr_32 = V_MAD_F32_e64 0, %35, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%38:sreg_64_xexec = V_CMP_NE_U32_e64 0, %5, implicit $exec
%39:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %38, implicit $exec
V_CMP_NE_U32_e32 1, %39, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
%40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $mode, implicit $exec
S_CBRANCH_VCCZ %bb.15, implicit $vcc
bb.14:
successors: %bb.17(0x80000000)
S_BRANCH %bb.17
bb.15:
successors: %bb.16(0x40000000), %bb.18(0x40000000)
%41:vgpr_32 = V_MAD_F32_e64 0, %40, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%42:sreg_64 = V_CMP_LE_F32_e64 0, 0, 0, %41, 0, implicit $mode, implicit $exec
%43:sreg_64 = V_CMP_GE_F32_e64 0, 1065353216, 0, %41, 0, implicit $mode, implicit $exec
%44:sreg_64 = S_AND_B64 %43, %43, implicit-def dead $scc
%45:sreg_64 = S_AND_B64 %42, %42, implicit-def dead $scc
%46:sreg_64 = S_AND_B64 %45, %44, implicit-def dead $scc
%47:sreg_64 = COPY $exec, implicit-def $exec
%48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
$exec = S_MOV_B64_term %48
S_CBRANCH_EXECZ %bb.18, implicit $exec
S_BRANCH %bb.16
bb.16:
successors: %bb.18(0x80000000)
S_BRANCH %bb.18
bb.17:
successors: %bb.21(0x40000000), %bb.23(0x40000000)
%49:sreg_64 = V_CMP_NE_U32_e64 0, %5, implicit $exec
%50:sreg_64 = S_AND_B64 $exec, %49, implicit-def dead $scc
%51:vreg_128 = IMPLICIT_DEF
$vcc = COPY %50
S_CBRANCH_VCCNZ %bb.21, implicit $vcc
S_BRANCH %bb.23
bb.18:
successors: %bb.20(0x40000000), %bb.19(0x40000000)
$exec = S_OR_B64 $exec, %47, implicit-def $scc
%52:vgpr_32 = V_MAD_F32_e64 0, %3.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 1, %3.sub0, 0, 0, implicit $mode, implicit $exec
%53:vgpr_32 = V_MUL_F32_e32 -2147483648, %3.sub1, implicit $mode, implicit $exec
%53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 1065353216, %3.sub2, %53, implicit $mode, implicit $exec
%54:vgpr_32 = V_MUL_F32_e32 %53, %53, implicit $mode, implicit $exec
%54:vgpr_32 = V_MAC_F32_e32 %52, %52, %54, implicit $mode, implicit $exec
%55:vgpr_32 = V_SQRT_F32_e32 %54, implicit $mode, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%56:vgpr_32 = V_MOV_B32_e32 981668463, implicit $exec
%57:sreg_64 = V_CMP_NGT_F32_e64 0, %55, 0, %56, 0, implicit $mode, implicit $exec
%58:sreg_64 = S_AND_B64 $exec, %57, implicit-def dead $scc
$vcc = COPY %58
S_CBRANCH_VCCZ %bb.20, implicit $vcc
bb.19:
successors: %bb.17(0x80000000)
S_BRANCH %bb.17
bb.20:
successors: %bb.17(0x80000000)
S_BRANCH %bb.17
bb.21:
successors: %bb.23(0x80000000)
%59:sreg_32 = S_MOV_B32 0
undef %51.sub0:vreg_128 = COPY %59
S_BRANCH %bb.23
bb.22:
successors: %bb.24(0x80000000)
S_BRANCH %bb.24
bb.23:
successors: %bb.22(0x80000000)
undef %60.sub1:vreg_64 = V_CVT_I32_F32_e32 %1, implicit $mode, implicit $exec
%60.sub0:vreg_64 = V_CVT_I32_F32_e32 %0, implicit $mode, implicit $exec
undef %61.sub0:sgpr_256 = S_MOV_B32 0
%61.sub1:sgpr_256 = COPY %61.sub0
%61.sub2:sgpr_256 = COPY %61.sub0
%61.sub3:sgpr_256 = COPY %61.sub0
%61.sub4:sgpr_256 = COPY %61.sub0
%61.sub5:sgpr_256 = COPY %61.sub0
%61.sub6:sgpr_256 = COPY %61.sub0
%61.sub7:sgpr_256 = COPY %61.sub0
%62:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec
%63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $mode, implicit $exec
%63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $mode, implicit $exec
%64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4)
%64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $mode, implicit $exec
%65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $mode, implicit $exec
%66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $mode, implicit $exec
%67:vgpr_32 = V_MAD_F32_e64 0, %66, 0, %62, 0, 0, 0, 0, implicit $mode, implicit $exec
%63:vgpr_32 = V_MAC_F32_e32 %15.sub2, %62, %63, implicit $mode, implicit $exec
%4:vgpr_32 = V_ADD_F32_e32 %63, %67, implicit $mode, implicit $exec
S_BRANCH %bb.22
bb.24:
%68:vgpr_32 = V_MUL_F32_e32 0, %4, implicit $mode, implicit $exec
%69:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %70:vgpr_32, 0, %68, 0, 0, implicit $mode, implicit $exec
EXP 0, undef %71:vgpr_32, %69, undef %72:vgpr_32, undef %73:vgpr_32, -1, -1, 15, implicit $exec
S_ENDPGM 0
...
|