File: si-init-whole-wave.mir

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (135 lines) | stat: -rw-r--r-- 6,571 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s

---
# Test that we don't do silly things when there is no whole wave mode in the
# shader (aka bb.1).
#
name:            test_no_wwm
alignment:       1
exposesReturnsTwice: false
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: test_no_wwm
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; CHECK-NEXT:   undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr8
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 5, [[COPY2]], 0, implicit $exec
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[COPY3]], implicit-def $scc
  ; CHECK-NEXT:   $vgpr8 = COPY [[COPY2]]
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY]]
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; CHECK-NEXT:   SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY4]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
    %9:sreg_32 = COPY $sgpr0
    undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
    %1.sub1:ccr_sgpr_64 = COPY $sgpr2
    %37:vgpr_32 = COPY $vgpr8
    %14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
    %16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
    %38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
    $exec_lo = S_MOV_B32_term %38:sreg_32
    S_CBRANCH_EXECZ %bb.2, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    %37:vgpr_32 = V_ADD_U32_e64 5, %37:vgpr_32, 0, implicit $exec

  bb.2:
    $exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
    $vgpr8 = COPY %37:vgpr_32
    $sgpr0 = COPY %9:sreg_32
    %2:sreg_32 = COPY $sgpr0
    SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %2:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...

---
# Test that we handle WWM in the shader correctly.
#
name:            test_wwm_bb1
alignment:       1
exposesReturnsTwice: false
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: test_wwm_bb1
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; CHECK-NEXT:   undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 24, [[COPY3]], 0, implicit $exec
  ; CHECK-NEXT:   [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32_xm0_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   dead [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
  ; CHECK-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY3]], 0, 71, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
  ; CHECK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 42, [[V_SET_INACTIVE_B32_]], 0, implicit $exec
  ; CHECK-NEXT:   $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
  ; CHECK-NEXT:   early-clobber [[COPY2]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e64_]], implicit $exec
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc
  ; CHECK-NEXT:   $vgpr8 = COPY [[COPY2]]
  ; CHECK-NEXT:   $vgpr9 = COPY [[COPY3]]
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY]]
  ; CHECK-NEXT:   SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
  bb.0:
   successors: %bb.1, %bb.2
   liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
   %9:sreg_32 = COPY $sgpr0
   undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
   %1.sub1:ccr_sgpr_64 = COPY $sgpr2
   %40:vgpr_32 = COPY $vgpr9
   %36:vgpr_32 = COPY $vgpr8
   %14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
   %16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
   %38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
   $exec_lo = S_MOV_B32_term %38:sreg_32
   S_CBRANCH_EXECZ %bb.2, implicit $exec
   S_BRANCH %bb.1

  bb.1:
   %36:vgpr_32 = V_ADD_U32_e64 24, %36:vgpr_32, 0, implicit $exec
   %20:sreg_32_xm0_xexec = IMPLICIT_DEF
   %19:vgpr_32 = V_SET_INACTIVE_B32 0, %36:vgpr_32, 0, 71, undef %20, implicit $exec, implicit-def $scc
   %18:vgpr_32 = V_ADD_U32_e64 42, %19:vgpr_32, 0, implicit $exec
   %40:vgpr_32 = STRICT_WWM %18:vgpr_32, implicit $exec

  bb.2:
   $exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
   $vgpr8 = COPY %40:vgpr_32
   $vgpr9 = COPY %36:vgpr_32
   $sgpr0 = COPY %9:sreg_32
   SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %9:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...