1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
|
//===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Combine VALU pairs into VOPD instructions
/// Only works on wave32
/// Has register requirements, we reject creating VOPD if the requirements are
/// not met.
/// shouldCombineVOPD mutator in postRA machine scheduler puts candidate
/// instructions for VOPD back-to-back
///
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "GCNVOPDUtils.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include <utility>
#define DEBUG_TYPE "gcn-create-vopd"
STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created.");
using namespace llvm;
namespace {
class GCNCreateVOPD : public MachineFunctionPass {
private:
class VOPDCombineInfo {
public:
VOPDCombineInfo() {}
VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
: FirstMI(First), SecondMI(Second) {}
MachineInstr *FirstMI;
MachineInstr *SecondMI;
};
public:
static char ID;
const GCNSubtarget *ST = nullptr;
GCNCreateVOPD() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override {
return "GCN Create VOPD Instructions";
}
bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
auto *FirstMI = CI.FirstMI;
auto *SecondMI = CI.SecondMI;
unsigned Opc1 = FirstMI->getOpcode();
unsigned Opc2 = SecondMI->getOpcode();
int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
AMDGPU::getVOPDOpcode(Opc2));
assert(NewOpcode != -1 &&
"Should have previously determined this as a possible VOPD\n");
auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
FirstMI->getDebugLoc(), SII->get(NewOpcode))
.setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
namespace VOPD = AMDGPU::VOPD;
MachineInstr *MI[] = {FirstMI, SecondMI};
auto InstInfo =
AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
for (auto CompIdx : VOPD::COMPONENTS) {
auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
for (auto CompIdx : VOPD::COMPONENTS) {
auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
}
for (auto CompIdx : VOPD::COMPONENTS)
VOPDInst.copyImplicitOps(*MI[CompIdx]);
LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
<< *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
for (auto CompIdx : VOPD::COMPONENTS)
MI[CompIdx]->eraseFromParent();
++NumVOPDCreated;
return true;
}
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
ST = &MF.getSubtarget<GCNSubtarget>();
if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
return false;
LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
const SIInstrInfo *SII = ST->getInstrInfo();
bool Changed = false;
SmallVector<VOPDCombineInfo> ReplaceCandidates;
for (auto &MBB : MF) {
auto MII = MBB.begin(), E = MBB.end();
while (MII != E) {
auto *FirstMI = &*MII;
MII = next_nodbg(MII, MBB.end());
if (MII == MBB.end())
break;
if (FirstMI->isDebugInstr())
continue;
auto *SecondMI = &*MII;
unsigned Opc = FirstMI->getOpcode();
unsigned Opc2 = SecondMI->getOpcode();
llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
VOPDCombineInfo CI;
if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
CI = VOPDCombineInfo(FirstMI, SecondMI);
else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
CI = VOPDCombineInfo(SecondMI, FirstMI);
else
continue;
// checkVOPDRegConstraints cares about program order, but doReplace
// cares about X-Y order in the constituted VOPD
if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
ReplaceCandidates.push_back(CI);
++MII;
}
}
}
for (auto &CI : ReplaceCandidates) {
Changed |= doReplace(SII, CI);
}
return Changed;
}
};
} // namespace
char GCNCreateVOPD::ID = 0;
char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
false, false)
|