1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file a TargetTransformInfo::Concept conforming object specific to the
/// NVPTX target machine. It uses the target's detailed information to
/// provide more precise answers to certain TTI queries, while letting the
/// target independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
#include "NVPTXTargetMachine.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/TargetLowering.h"
#include <optional>
namespace llvm {
class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
typedef TargetTransformInfo TTI;
friend BaseT;
const NVPTXSubtarget *ST;
const NVPTXTargetLowering *TLI;
const NVPTXSubtarget *getST() const { return ST; };
const NVPTXTargetLowering *getTLI() const { return TLI; };
public:
explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
bool hasBranchDivergence(const Function *F = nullptr) { return true; }
bool isSourceOfDivergence(const Value *V);
unsigned getFlatAddressSpace() const {
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
}
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
// Loads and stores can be vectorized if the alignment is at least as big as
// the load/store we want to vectorize.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const {
return Alignment >= ChainSizeInBytes;
}
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
}
// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
// We conservatively return 1 here which is just enough to enable the
// vectorizers but disables heuristics based on the number of registers.
// FIXME: Return a more reasonable number, while keeping an eye on
// LoopVectorizer's unrolling heuristics.
unsigned getNumberOfRegisters(bool Vector) const { return 1; }
// Only <2 x half> should be vectorized, so always return 32 for the vector
// register size.
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
return TypeSize::getFixed(32);
}
unsigned getMinVectorRegisterBitWidth() const { return 32; }
// We don't want to prevent inlining because of target-cpu and -features
// attributes that were added to newer versions of LLVM/Clang: There are
// no incompatible functions in PTX, ptxas will throw errors in such cases.
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const {
return true;
}
// Increase the inlining cost threshold by a factor of 11, reflecting that
// calls are particularly expensive in NVPTX.
unsigned getInliningThresholdMultiplier() const { return 11; }
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
// Volatile loads/stores are only supported for shared and global address
// spaces, or for generic AS that maps to them.
if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
AddrSpace == llvm::ADDRESS_SPACE_SHARED))
return false;
switch(I->getOpcode()){
default:
return false;
case Instruction::Load:
case Instruction::Store:
return true;
}
}
};
} // end namespace llvm
#endif
|