1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
|
//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// Defines an instruction selector for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
namespace {
static inline bool isNullConstantOrUndef(SDValue V) {
return V.isUndef() || isNullConstant(V);
}
static inline bool getConstantValue(SDValue N, uint32_t &Out) {
// This is only used for packed vectors, where using 0 for undef should
// always be good.
if (N.isUndef()) {
Out = 0;
return true;
}
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
Out = C->getAPIntValue().getSExtValue();
return true;
}
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
return true;
}
return false;
}
// TODO: Handle undef as zero
static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
bool Negate = false) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
uint32_t LHSVal, RHSVal;
if (getConstantValue(N->getOperand(0), LHSVal) &&
getConstantValue(N->getOperand(1), RHSVal)) {
SDLoc SL(N);
uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
: (LHSVal & 0xffff) | (RHSVal << 16);
return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
DAG.getTargetConstant(K, SL, MVT::i32));
}
return nullptr;
}
static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
return packConstantV2I16(N, DAG, true);
}
} // namespace
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const GCNSubtarget *Subtarget;
// Default FP mode for the current function.
SIModeRegisterDefaults Mode;
bool EnableLateStructurizeCFG;
// Instructions that will be lowered with a final instruction that zeros the
// high result bits.
bool fp16SrcZerosHighBits(unsigned Opc) const;
public:
static char ID;
AMDGPUDAGToDAGISel() = delete;
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel);
~AMDGPUDAGToDAGISel() override = default;
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool matchLoadD16FromBuildVector(SDNode *N) const;
bool runOnMachineFunction(MachineFunction &MF) override;
void PreprocessISelDAG() override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
void PostprocessISelDAG() override;
protected:
void SelectBuildVector(SDNode *N, unsigned RegClassID);
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
bool isNegInlineImmediate(const SDNode *N) const {
return isInlineImmediate(N, true);
}
bool isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate32(int64_t Imm) const {
return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate64(int64_t Imm) const {
return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate(const APFloat &Imm) const {
return Subtarget->getInstrInfo()->isInlineConstant(Imm);
}
bool isVGPRImm(const SDNode *N) const;
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
// Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
// `ShAmtBits` bits is unneeded.
bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
SDValue &RHS) const;
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
SDNode *glueCopyToM0LDSInit(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
unsigned Size) const;
bool isFlatScratchBaseLegal(
SDValue Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset) const;
bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &ImmOffset) const;
bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset) const;
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &Offset) const;
bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
uint64_t ImmOffset) const;
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false,
bool IsBuffer = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false,
bool IsBuffer = false) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
SDValue &Offset) const;
bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
bool IsCanonicalizing = true,
bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src,
SDValue &SrcMods) const;
bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods,
bool OpSel) const;
bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
SDValue &Omod) const;
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
unsigned &Mods) const;
bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
SDValue &SrcMods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
SDValue getHi16Elt(SDValue In) const;
SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
void SelectADD_SUB_I64(SDNode *N);
void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectMAD_64_32(SDNode *N);
void SelectMUL_LOHI(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
uint32_t Width);
void SelectS_BFEFromShifts(SDNode *N);
void SelectS_BFE(SDNode *N);
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
|