1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass combines split register tuple initialization into a single psuedo:
///
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
/// %0.sub0:sreg_64 = S_MOV_B32 2
/// =>
/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
///
/// This is to allow rematerialization of a value instead of spilling. It is
/// supposed to be done after register coalescer to allow it to do its job and
/// before actual register allocation to allow rematerialization.
///
/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
/// although the same shall be possible with other register classes and
/// instructions if necessary.
///
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
namespace {
class GCNPreRAOptimizations : public MachineFunctionPass {
private:
const SIInstrInfo *TII;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
bool processReg(Register Reg);
public:
static char ID;
GCNPreRAOptimizations() : MachineFunctionPass(ID) {
initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AMDGPU Pre-RA optimizations";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
"AMDGPU Pre-RA optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
false, false)
char GCNPreRAOptimizations::ID = 0;
char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
return new GCNPreRAOptimizations();
}
bool GCNPreRAOptimizations::processReg(Register Reg) {
MachineInstr *Def0 = nullptr;
MachineInstr *Def1 = nullptr;
uint64_t Init = 0;
for (MachineInstr &I : MRI->def_instructions(Reg)) {
if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
!I.getOperand(1).isImm() || I.getNumOperands() != 2)
return false;
switch (I.getOperand(0).getSubReg()) {
default:
return false;
case AMDGPU::sub0:
if (Def0)
return false;
Def0 = &I;
Init |= I.getOperand(1).getImm() & 0xffffffff;
break;
case AMDGPU::sub1:
if (Def1)
return false;
Def1 = &I;
Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
break;
}
}
if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
return false;
LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
<< " =>\n");
if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
LIS->getInstructionIndex(*Def0)))
std::swap(Def0, Def1);
LIS->RemoveMachineInstrFromMaps(*Def0);
LIS->RemoveMachineInstrFromMaps(*Def1);
auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
.addImm(Init);
Def0->eraseFromParent();
Def1->eraseFromParent();
LIS->InsertMachineInstrInMaps(*NewI);
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
LLVM_DEBUG(dbgs() << " " << *NewI);
return true;
}
bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
bool Changed = false;
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
Register Reg = Register::index2VirtReg(I);
if (!LIS->hasInterval(Reg))
continue;
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
continue;
Changed |= processReg(Reg);
}
return Changed;
}
|