1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
|
//===-- AMDGPURegBankSelect.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// Assign register banks to all register operands of G_ instructions using
/// machine uniformity analysis.
/// Sgpr - uniform values and some lane masks
/// Vgpr - divergent, non S1, values
/// Vcc - divergent S1 values(lane masks)
/// However in some cases G_ instructions with this register bank assignment
/// can't be inst-selected. This is solved in AMDGPURegBankLegalize.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-regbankselect"
using namespace llvm;
using namespace AMDGPU;
namespace {
class AMDGPURegBankSelect : public MachineFunctionPass {
public:
static char ID;
AMDGPURegBankSelect() : MachineFunctionPass(ID) {
initializeAMDGPURegBankSelectPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AMDGPU Register Bank Select";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addRequired<MachineUniformityAnalysisPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
// This pass assigns register banks to all virtual registers, and we maintain
// this property in subsequent passes
MachineFunctionProperties getSetProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::RegBankSelected);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE,
"AMDGPU Register Bank Select", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
"AMDGPU Register Bank Select", false, false)
char AMDGPURegBankSelect::ID = 0;
char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID;
FunctionPass *llvm::createAMDGPURegBankSelectPass() {
return new AMDGPURegBankSelect();
}
class RegBankSelectHelper {
MachineIRBuilder &B;
MachineRegisterInfo &MRI;
AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
const MachineUniformityInfo &MUI;
const RegisterBank *SgprRB;
const RegisterBank *VgprRB;
const RegisterBank *VccRB;
public:
RegBankSelectHelper(MachineIRBuilder &B,
AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
const MachineUniformityInfo &MUI,
const RegisterBankInfo &RBI)
: B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI),
SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
const RegisterBank *getRegBankToAssign(Register Reg) {
if (MUI.isUniform(Reg) || ILMA.isS32S64LaneMask(Reg))
return SgprRB;
if (MRI.getType(Reg) == LLT::scalar(1))
return VccRB;
return VgprRB;
}
// %rc:RegClass(s32) = G_ ...
// ...
// %a = G_ ..., %rc
// ->
// %rb:RegBank(s32) = G_ ...
// %rc:RegClass(s32) = COPY %rb
// ...
// %a = G_ ..., %rb
void reAssignRegBankOnDef(MachineInstr &MI, MachineOperand &DefOP,
const RegisterBank *RB) {
// Register that already has Register class got it during pre-inst selection
// of another instruction. Maybe cross bank copy was required so we insert a
// copy that can be removed later. This simplifies post regbanklegalize
// combiner and avoids need to special case some patterns.
Register Reg = DefOP.getReg();
LLT Ty = MRI.getType(Reg);
Register NewReg = MRI.createVirtualRegister({RB, Ty});
DefOP.setReg(NewReg);
auto &MBB = *MI.getParent();
B.setInsertPt(MBB, MBB.SkipPHIsAndLabels(std::next(MI.getIterator())));
B.buildCopy(Reg, NewReg);
// The problem was discovered for uniform S1 that was used as both
// lane mask(vcc) and regular sgpr S1.
// - lane-mask(vcc) use was by si_if, this use is divergent and requires
// non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
// sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
// - the regular sgpr S1(uniform) instruction is now broken since
// it uses sreg_64_xexec(S1) which is divergent.
// Replace virtual registers with register class on generic instructions
// uses with virtual registers with register bank.
for (auto &UseMI : make_early_inc_range(MRI.use_instructions(Reg))) {
if (UseMI.isPreISelOpcode()) {
for (MachineOperand &Op : UseMI.operands()) {
if (Op.isReg() && Op.getReg() == Reg)
Op.setReg(NewReg);
}
}
}
}
// %a = G_ ..., %rc
// ->
// %rb:RegBank(s32) = COPY %rc
// %a = G_ ..., %rb
void constrainRegBankUse(MachineInstr &MI, MachineOperand &UseOP,
const RegisterBank *RB) {
Register Reg = UseOP.getReg();
LLT Ty = MRI.getType(Reg);
Register NewReg = MRI.createVirtualRegister({RB, Ty});
UseOP.setReg(NewReg);
if (MI.isPHI()) {
auto DefMI = MRI.getVRegDef(Reg)->getIterator();
MachineBasicBlock *DefMBB = DefMI->getParent();
B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
} else {
B.setInstr(MI);
}
B.buildCopy(NewReg, Reg);
}
};
static Register getVReg(MachineOperand &Op) {
if (!Op.isReg())
return {};
// Operands of COPY and G_SI_CALL can be physical registers.
Register Reg = Op.getReg();
if (!Reg.isVirtual())
return {};
return Reg;
}
bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
// Setup the instruction builder with CSE.
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
GISelCSEInfo &CSEInfo = Wrapper.get(TPC.getCSEConfig());
GISelObserverWrapper Observer;
Observer.addObserver(&CSEInfo);
CSEMIRBuilder B(MF);
B.setCSEInfo(&CSEInfo);
B.setChangeObserver(Observer);
RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
IntrinsicLaneMaskAnalyzer ILMA(MF);
MachineUniformityInfo &MUI =
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
MachineRegisterInfo &MRI = *B.getMRI();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
RegBankSelectHelper RBSHelper(B, ILMA, MUI, *ST.getRegBankInfo());
// Virtual registers at this point don't have register banks.
// Virtual registers in def and use operands of already inst-selected
// instruction have register class.
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
// Vregs in def and use operands of COPY can have either register class
// or bank. If there is neither on vreg in def operand, assign bank.
if (MI.isCopy()) {
Register DefReg = getVReg(MI.getOperand(0));
if (!DefReg.isValid() || MRI.getRegClassOrNull(DefReg))
continue;
assert(!MRI.getRegBankOrNull(DefReg));
MRI.setRegBank(DefReg, *RBSHelper.getRegBankToAssign(DefReg));
continue;
}
if (!MI.isPreISelOpcode())
continue;
// Vregs in def and use operands of G_ instructions need to have register
// banks assigned. Before this loop possible case are
// - (1) vreg without register class or bank in def or use operand
// - (2) vreg with register class in def operand
// - (3) vreg, defined by G_ instruction, in use operand
// - (4) vreg, defined by pre-inst-selected instruction, in use operand
// First three cases are handled in loop through all def operands of G_
// instructions. For case (1) simply setRegBank. Cases (2) and (3) are
// handled by reAssignRegBankOnDef.
for (MachineOperand &DefOP : MI.defs()) {
Register DefReg = getVReg(DefOP);
if (!DefReg.isValid())
continue;
const RegisterBank *RB = RBSHelper.getRegBankToAssign(DefReg);
if (MRI.getRegClassOrNull(DefReg))
RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB);
else {
assert(!MRI.getRegBankOrNull(DefReg));
MRI.setRegBank(DefReg, *RB);
}
}
// Register bank select doesn't modify pre-inst-selected instructions.
// For case (4) need to insert a copy, handled by constrainRegBankUse.
for (MachineOperand &UseOP : MI.uses()) {
Register UseReg = getVReg(UseOP);
if (!UseReg.isValid())
continue;
// Skip case (3).
if (!MRI.getRegClassOrNull(UseReg) ||
MRI.getVRegDef(UseReg)->isPreISelOpcode())
continue;
// Use with register class defined by pre-inst-selected instruction.
const RegisterBank *RB = RBSHelper.getRegBankToAssign(UseReg);
RBSHelper.constrainRegBankUse(MI, UseOP, RB);
}
}
}
return true;
}
|