File: coalescing-with-subregs-in-loop-bug.mir

package info (click to toggle)
llvm-toolchain-14 1%3A14.0.6-12
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,496,180 kB
  • sloc: cpp: 5,593,972; ansic: 986,872; asm: 585,869; python: 184,223; objc: 72,530; lisp: 31,119; f90: 27,793; javascript: 9,780; pascal: 9,762; sh: 9,482; perl: 7,468; ml: 5,432; awk: 3,523; makefile: 2,538; xml: 953; cs: 573; fortran: 567
file content (98 lines) | stat: -rw-r--r-- 3,864 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -run-pass=simple-register-coalescing,rename-independent-subregs %s -o - | FileCheck -check-prefix=GCN %s

# This test is for a bug where the following happens:
#
# Inside the loop, %29.sub2 is used in a V_LSHLREV whose result is then used
# in an LDS read. %29 is a 128 bit value that is linked by copies to
# %45 (from phi elimination), %28 (the value in the loop pre-header),
# %31 (defined and subreg-modified in the loop, and used after the loop)
# and %30:
#
#     %45:vreg_128 = COPY killed %28
# bb.1:
#     %29:vreg_128 = COPY killed %45
#     %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec
#     %31:vreg_128 = COPY killed %29
#     %31.sub1:vreg_128 = COPY %34
#     %30:vreg_128 = COPY %31
#     %45:vreg_128 = COPY killed %30
#     S_CBRANCH_EXECNZ %bb.39, implicit $exec
#     S_BRANCH %bb.40
# bb.2:
#     undef %32.sub0:vreg_128 = COPY killed %31.sub0
#
# So this coalesces together into a single 128 bit value whose sub1 is modified
# in the loop, but the sub2 used in the V_LSHLREV is not modified in the loop.
#
# The bug is that the coalesced value has a L00000004 subrange (for sub2) that
# says that it is not live up to the end of the loop block. The symptom is that
# Rename Independent Subregs separates sub2 into its own register, and it is
# not live round the loop, so that pass adds an IMPLICIT_DEF for it just before
# the loop backedge.

# GCN: bb.1:
# GCN: V_LSHLREV_B32_e32 2, [[val:%[0-9][0-9]*]].sub2
# GCN-NOT: [[val]]:vreg_128 = IMPLICIT_DEF

---
name: _amdgpu_cs_main
tracksRegLiveness: true
body: |
  bb.0:
    successors: %bb.1

    %3:sgpr_32 = S_MOV_B32 0
    undef %19.sub1:vreg_128 = COPY undef %3
    %4:sgpr_32 = S_MOV_B32 1
    %5:sgpr_32 = S_MOV_B32 2
    %11:sreg_32_xm0 = S_MOV_B32 255
    undef %28.sub0:vreg_128 = COPY killed %3
    %28.sub1:vreg_128 = COPY killed %4
    %28.sub2:vreg_128 = COPY killed %11
    %28.sub3:vreg_128 = COPY killed %5
    %2:sreg_64 = S_MOV_B64 0
    %34:sreg_32 = S_MOV_B32 7
    %37:vreg_128 = COPY undef %42:vreg_128
    %43:sreg_64 = COPY killed %2
    %44:vreg_128 = COPY killed %37
    %45:vreg_128 = COPY killed %28

  bb.1:
    successors: %bb.1, %bb.2

    %29:vreg_128 = COPY killed %45
    %36:vreg_128 = COPY killed %44
    %0:sreg_64 = COPY killed %43
    %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec
    %41:vgpr_32 = V_ADD_CO_U32_e32 1152, %39, implicit-def dead $vcc, implicit $exec
    $m0 = S_MOV_B32 -1
    %12:vreg_64 = DS_READ2_B32 killed %41, 0, 1, 0, implicit $m0, implicit $exec
    %13:vreg_64 = DS_READ2_B32 %39, -112, -111, 0, implicit $m0, implicit $exec
    %14:vreg_64 = DS_READ2_B32 %39, 0, 1, 0, implicit $m0, implicit $exec
    %40:vgpr_32 = V_ADD_CO_U32_e32 1160, %39, implicit-def dead $vcc, implicit $exec
    %15:vreg_64 = DS_READ2_B32 killed %40, 0, 1, 0, implicit $m0, implicit $exec
    %16:vreg_64 = DS_READ2_B32 %39, -110, -109, 0, implicit $m0, implicit $exec
    %17:vreg_64 = DS_READ2_B32 %39, 2, 3, 0, implicit $m0, implicit $exec
    undef %35.sub1:vreg_128 = COPY undef %34
    %31:vreg_128 = COPY killed %29
    %31.sub1:vreg_128 = COPY %34
    %38:vgpr_32 = V_ADD_CO_U32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec
    %18:sreg_64 = V_CMP_LT_I32_e64 5, %38, implicit $exec
    %1:sreg_64 = S_OR_B64 killed %18, killed %0, implicit-def $scc
    %30:vreg_128 = COPY %31
    %43:sreg_64 = COPY %1
    %44:vreg_128 = COPY %35
    %45:vreg_128 = COPY killed %30
    $exec = S_ANDN2_B64_term $exec, %1, implicit-def $scc
    S_CBRANCH_EXECNZ %bb.1, implicit $exec
    S_BRANCH %bb.2

  bb.2:
    $exec = S_OR_B64 $exec, killed %1, implicit-def $scc
    %33:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    undef %32.sub0:vreg_128 = COPY killed %31.sub0
    %32.sub2:vreg_128 = COPY %33
    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %32:vreg_128
    S_ENDPGM 0

...