File: tied-op-for-wwm-scratch-reg-spill-restore.mir

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (142 lines) | stat: -rw-r--r-- 7,450 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

# The COPY that moves the return value to VGPR0 should not be removed during machine-cp. The spill restore of the same register that follows,
# meant to only reload its inactive lanes. By marking the reg itself as the tied-op in the spill reload prevents the undesired optimization.

---
name:            wwm_scratch_reg_spill_reload_of_outgoing_reg
tracksRegLiveness: true
machineFunctionInfo:
  wwmReservedRegs: ['$vgpr0']
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
body:             |
  bb.0:
    liveins: $sgpr20, $vgpr1
    ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg
    ; GCN: liveins: $sgpr20, $vgpr0, $vgpr1
    ; GCN-NEXT: {{  $}}
    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
    ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: SI_RETURN implicit $vgpr0
    $vgpr0 = IMPLICIT_DEF
    $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
    $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
    SI_RETURN implicit $vgpr0
...

# The reload of vgpr0 require the tied-op as it is a subreg in the outgoing tuple register vgpr0_vgpr1.
# The vgpr2 doesn't need the tied-op in the reload as it isn't holding any return value.
---
name:            wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
tracksRegLiveness: true
machineFunctionInfo:
  wwmReservedRegs: ['$vgpr0', '$vgpr2']
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
body:             |
  bb.0:
    liveins: $sgpr20, $sgpr21, $vgpr1
    ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
    ; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2
    ; GCN-NEXT: {{  $}}
    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
    ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
    ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr21, 0, $vgpr2
    ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
    $vgpr0 = IMPLICIT_DEF
    $vgpr2 = IMPLICIT_DEF
    $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
    $vgpr2 = V_WRITELANE_B32 killed $sgpr21, 0, $vgpr2
    $vgpr0 = COPY $vgpr1, implicit $exec
    SI_RETURN implicit $vgpr0_vgpr1
...

# Tied op not required in the spill reload of vgpr2.

---
name:            wwm_scratch_reg_spill_reload_different_outgoing_reg
tracksRegLiveness: true
machineFunctionInfo:
  wwmReservedRegs: ['$vgpr2']
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
body:             |
  bb.0:
    liveins: $sgpr20, $vgpr1
    ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg
    ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2
    ; GCN-NEXT: {{  $}}
    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
    ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr2
    ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
    $vgpr2 = IMPLICIT_DEF
    $vgpr2 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr2
    $vgpr0 = COPY $vgpr1, implicit $exec
    SI_RETURN implicit $vgpr0_vgpr1
...

# Tied op not required in the spill reload of vgpr40 which is in the CSR range.
---
name:            wwm_csr_spill_reload
tracksRegLiveness: true
machineFunctionInfo:
  wwmReservedRegs: ['$vgpr40']
  isEntryFunction: false
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
  frameOffsetReg: '$sgpr33'
body:             |
  bb.0:
    liveins: $sgpr20, $vgpr1
    ; GCN-LABEL: name: wwm_csr_spill_reload
    ; GCN: liveins: $sgpr20, $vgpr1, $vgpr40
    ; GCN-NEXT: {{  $}}
    ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF
    ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr40
    ; GCN-NEXT: $sgpr20 = V_READLANE_B32 $vgpr40, 0, implicit $exec
    ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
    ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
    ; GCN-NEXT: SI_RETURN implicit $vgpr0
    $vgpr40 = IMPLICIT_DEF
    $vgpr40 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr40
    $sgpr20 = V_READLANE_B32 $vgpr40, 0, implicit $exec
    $vgpr0 = COPY killed $vgpr1, implicit $exec
    SI_RETURN implicit $vgpr0
...