File: s_or_saveexec_xor_combine.mir

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (127 lines) | stat: -rw-r--r-- 4,499 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck --check-prefixes=GCN,WAVE32 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck --check-prefixes=GCN,WAVE64 %s

---

# After the Optimize exec masking (post-RA) pass, codegen can end up with the following sequence:
# s_or_saveexec_b32 s0, s0
# s_xor_b32 exec_lo, exec_lo, s0
#
# This can be combined into one instruction:
# s_andn2_saveexec_b32 s0, s0

# Ensure the transformation gets applied in the b32 case.
# GCN-LABEL: name: s_or_saveexec_xor_combine_b32
# WAVE32: S_ANDN2_SAVEEXEC_B32
name: s_or_saveexec_xor_combine_b32
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
    $exec_lo = S_XOR_B32 $exec_lo, renamable $sgpr0, implicit-def $scc
...

---

# Ensure the transformation gets applied in the b64 case.
# GCN-LABEL: name: s_or_saveexec_xor_combine_b64
# WAVE64: S_ANDN2_SAVEEXEC_B64
name: s_or_saveexec_xor_combine_b64
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0_sgpr1
    renamable $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
    $exec = S_XOR_B64 $exec, renamable $sgpr0_sgpr1, implicit-def $scc
...

---

# Ensure the transformation does get applied even if the operands are swapped.
# GCN-LABEL: name: s_or_saveexec_xor_combine_b32_swap
# WAVE32: S_ANDN2_SAVEEXEC_B32
name: s_or_saveexec_xor_combine_b32_swap
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
    $exec_lo = S_XOR_B32 renamable $sgpr0, $exec_lo, implicit-def $scc
...

---

# Ensure the transformation does get applied if source and dest operand for s_or_saveeexec are not equal.
# GCN-LABEL: name: s_or_saveexec_xor_combine_b32_inequal_operands
# WAVE32: S_ANDN2_SAVEEXEC
name: s_or_saveexec_xor_combine_b32_inequal_operands
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0, $sgpr1
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
    $exec_lo = S_XOR_B32 $exec_lo, renamable $sgpr0, implicit-def $scc
...

---

# Ensure the transformation does not get applied if s_xor does not use the dest as input operand.
# GCN-LABEL: name: s_or_saveexec_xor_combine_b32_wrong_input
# WAVE32: S_OR_SAVEEXEC
# WAVE32: S_XOR_B32
name: s_or_saveexec_xor_combine_b32_wrong_input
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0, $sgpr1
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
    $exec_lo = S_XOR_B32 $exec_lo, renamable $sgpr1, implicit-def $scc
...

---


# Ensure the transformation does not get applied if the instructions don't appear sequentially.
# GCN-LABEL: name: s_or_saveexec_xor_combine_b32_non_sequence
# WAVE32: S_OR_SAVEEXEC
# WAVE32: S_MOV_B32
# WAVE32: S_XOR_B32
name: s_or_saveexec_xor_combine_b32_non_sequence
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0, $sgpr1
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
    renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
    $exec_lo = S_XOR_B32 $exec_lo, renamable $sgpr1, implicit-def $scc
...

---

# Don't apply the transformation if the basic block only has a single instruction.

# GCN-LABEL: name: s_or_saveexec_xor_combine_b32_last_inst
# WAVE32: S_OR_SAVEEXEC
name: s_or_saveexec_xor_combine_b32_last_inst
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
...

---

# Don't apply the transformation if the basic block ends with an S_OR_SAVEEXEC_B32 instruction.

# GCN-LABEL: name: s_or_saveexec_xor_combine_b32_or_saveexec_terminator
# WAVE32: S_MOV_B32
# WAVE32: S_OR_SAVEEXEC
name: s_or_saveexec_xor_combine_b32_or_saveexec_terminator
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $sgpr0, $sgpr1
    renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
    renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec