1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Any MIMG instructions that use tfe or lwe require an initialization of the
/// result register that will be written in the case of a memory access failure
/// The required code is also added to tie this init code to the result of the
/// img instruction
///
//===----------------------------------------------------------------------===//
//
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-img-init"
using namespace llvm;
namespace {
class SIAddIMGInit : public MachineFunctionPass {
public:
static char ID;
public:
SIAddIMGInit() : MachineFunctionPass(ID) {
initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
char SIAddIMGInit::ID = 0;
char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *RI = ST.getRegisterInfo();
bool Changed = false;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next;
for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
auto Opcode = MI.getOpcode();
if (TII->isMIMG(Opcode) && !MI.mayStore()) {
MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
// Check for instructions that don't have tfe or lwe fields
// There shouldn't be any at this point.
assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
unsigned TFEVal = TFE->getImm();
unsigned LWEVal = LWE->getImm();
unsigned D16Val = D16 ? D16->getImm() : 0;
if (TFEVal || LWEVal) {
// At least one of TFE or LWE are non-zero
// We have to insert a suitable initialization of the result value and
// tie this to the dest of the image instruction.
const DebugLoc &DL = MI.getDebugLoc();
int DstIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
// Calculate which dword we have to initialize to 0.
MachineOperand *MO_Dmask =
TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
// check that dmask operand is found.
assert(MO_Dmask && "Expected dmask operand in instruction");
unsigned dmask = MO_Dmask->getImm();
// Determine the number of active lanes taking into account the
// Gather4 special case
unsigned ActiveLanes =
TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
// Subreg indices are counted from 1
// When D16 then we want next whole VGPR after write data.
static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
bool Packed = !ST.hasUnpackedD16VMem();
unsigned InitIdx =
D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
// Abandon attempt if the dst size isn't large enough
// - this is in fact an error but this is picked up elsewhere and
// reported correctly.
uint32_t DstSize =
RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
if (DstSize < InitIdx)
continue;
// Create a register for the intialization value.
unsigned PrevDst =
MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
unsigned NewDst = 0; // Final initialized value will be in here
// If PRTStrictNull feature is enabled (the default) then initialize
// all the result registers to 0, otherwise just the error indication
// register (VGPRn+1)
unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
if (DstSize == 1) {
// In this case we can just initialize the result directly
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
.addImm(0);
NewDst = PrevDst;
} else {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
for (; SizeLeft; SizeLeft--, CurrIdx++) {
NewDst =
MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
// Initialize dword
unsigned SubReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
.addImm(0);
// Insert into the super-reg
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
.addReg(PrevDst)
.addReg(SubReg)
.addImm(CurrIdx);
PrevDst = NewDst;
}
}
// Add as an implicit operand
MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
// Tie the just added implicit operand to the dst
MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
Changed = true;
}
}
}
}
return Changed;
}
|