1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#pragma once
#include "AdaptorCommon/ImplicitArgs.hpp"
#include "Compiler/Optimizer/OpenCLPasses/KernelArgs/KernelArgs.hpp"
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
#include "Compiler/MetaDataUtilsWrapper.h"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/Pass.h>
#include <llvm/IR/InstVisitor.h>
#include <llvm/IR/Instruction.h>
#include <llvm/Analysis/AssumptionCache.h>
#include "common/LLVMWarningsPop.hpp"
#include "Probe/Assertion.h"
namespace IGC {
// Too many stateful promotion will overwhelm the surface state
// cache(32 entries per HDC), which will significantly impact the
// performance. Simply disable stateful promotion after 32 args.
constexpr uint maxPromotionCount = 32;
enum class TargetAddressing { BINDFUL, BINDLESS };
class StatelessToStateful : public llvm::ModulePass, public llvm::InstVisitor<StatelessToStateful> {
public:
typedef llvm::DenseMap<const KernelArg *, int> ArgInfoMap;
static char ID;
StatelessToStateful();
StatelessToStateful(TargetAddressing addressing);
~StatelessToStateful() {}
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MetaDataUtilsWrapper>();
AU.addRequired<llvm::AssumptionCacheTracker>();
AU.addRequired<CodeGenContextWrapper>();
}
virtual llvm::StringRef getPassName() const override { return "StatelessToStateful"; }
virtual bool runOnModule(llvm::Module &M) override;
void visitLoadInst(llvm::LoadInst &I);
void visitStoreInst(llvm::StoreInst &I);
void visitCallInst(llvm::CallInst &I);
private:
struct InstructionInfo {
InstructionInfo(llvm::Instruction *I, llvm::Value *ptr, llvm::Value *offset)
: statelessInst(I), ptr(ptr), offset(offset) {}
InstructionInfo() = delete;
void setStatefulAddrspace(unsigned addrspace) { statefulAddrSpace = addrspace; }
unsigned getStatefulAddrSpace() {
IGC_ASSERT(statefulAddrSpace);
return *statefulAddrSpace;
}
void setBaseArgIndex(unsigned index) { baseArgIndex = index; }
unsigned getBaseArgIndex() { return baseArgIndex; }
llvm::Instruction *const statelessInst;
llvm::Value *const ptr;
llvm::Value *const offset;
private:
unsigned baseArgIndex = 0;
std::optional<unsigned> statefulAddrSpace;
};
void handleFunction(llvm::Function &F);
void setModuleUsesBindless();
bool getModuleUsesBindless();
void findPromotableInstructions();
void addToPromotionMap(llvm::Instruction &I, llvm::Value *Ptr);
void promote();
void promoteInstruction(InstructionInfo &InstInfo);
void promoteLoad(InstructionInfo &InstInfo);
void promoteStore(InstructionInfo &InstInfo);
void promoteIntrinsic(InstructionInfo &InstInfo);
bool doPromoteUntypedAtomics(const llvm::GenISAIntrinsic::ID intrinID, const llvm::GenIntrinsicInst *Inst);
bool isUntypedAtomic(const llvm::GenISAIntrinsic::ID intrinID);
// LLVM InstCombine pass replaces multiple loads that have a single phi instruction as their user,
// with a phi on the addresses followed by a single load. This prevents StatelessToStateful from making loads
// statefull.
// These functions are aimed to revert this change which was made by InstCombine.
bool hoistLoad();
bool canWriteToMemoryTill(llvm::Instruction *Till);
bool isItSafeToHoistLoad(llvm::LoadInst *LI, llvm::PHINode *Phi);
// pointerIsPositiveOffsetFromKernelArgument - check if V can trace back to a kernel argument and
// has positive offset from that argument.
// ignoreSyncBuffer - when set to true, return false directly if V is from the implicit kernel
// argument "sync buffer". sync buffer must be stateless access in ZEBinary path so cannot be promoted.
bool pointerIsPositiveOffsetFromKernelArgument(llvm::Function *F, llvm::Value *V, llvm::Value *&offset,
unsigned int &argNumber, bool ignoreSyncBuffer);
// Check if the given pointer value can be traced back to any kernel argument.
// return the kernel argument if found, otherwise return nullptr.
const KernelArg *getKernelArgFromPtr(const llvm::PointerType &ptrType, llvm::Value *pVal);
// check if the given pointer can be traced back to any kernel argument
bool pointerIsFromKernelArgument(llvm::Value &ptr);
bool getOffsetFromGEP(llvm::Function *F, const llvm::SmallVector<llvm::GetElementPtrInst *, 4> &GEPs,
uint32_t argNumber, bool isImplicitArg, llvm::Value *&offset);
llvm::Argument *getBufferOffsetArg(llvm::Function *F, uint32_t ArgNumber);
void setPointerSizeTo32bit(int32_t AddrSpace, llvm::Module *M);
// Encode uavIndex in addrspace. Note that uavIndex is not always the same as BTI.
// Read only images are qualified as SRV resources and have separate indices space.
// Writeable images and buffers are qualified as UAV resources and also have a
// separate indices space. So if there is a read_only image and global buffer in the kernel,
// they will both have `0` encoded in addrspace. The actual BTI will be computed based
// on BTLayout in EmitVISAPass.
unsigned encodeBindfulAddrspace(unsigned uavIndex);
void updateArgInfo(const KernelArg *KA, bool IsPositive);
void finalizeArgInitialValue(llvm::Function *F);
const KernelArg *getKernelArg(llvm::Value *Arg) {
IGC_ASSERT_MESSAGE(m_pKernelArgs, "Should initialize it before use!");
for (const KernelArg &arg : *m_pKernelArgs) {
if (arg.getArg() == Arg) {
return &arg;
}
}
return nullptr;
}
const KernelArg *getBufferOffsetKernelArg(const KernelArg *KA) {
IGC_ASSERT_MESSAGE(m_pKernelArgs, "KernelArgs: should initialize it before use!");
int argno = KA->getAssociatedArgNo();
for (const KernelArg &arg : *m_pKernelArgs) {
if (arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET && arg.getAssociatedArgNo() == argno) {
return &arg;
}
}
return nullptr;
}
// When true, runtime can generate surface with buffer's original base (creation base)
bool m_hasBufferOffsetArg = false;
// When m_hasBufferOffsetArg is true, optional buffer offset
// can be on or off, which is indicated by this boolean flag.
bool m_hasOptionalBufferOffsetArg = false;
// When true, every messages that are in ptrArg + offset will have offset >= 0.
bool m_hasPositivePointerOffset = false;
// Handle non-gep pointer
// For historic reason (probably non-DW aligned arg), non-gep ptr isn't handled.
// If this field is true, non-gep ptr shall be handled.
const bool m_supportNonGEPPtr = false;
llvm::AssumptionCacheTracker *m_ACT = nullptr;
llvm::AssumptionCache *getAC(llvm::Function *F) {
return (m_ACT != nullptr ? &m_ACT->getAssumptionCache(*F) : nullptr);
}
TargetAddressing m_targetAddressing;
OpenCLProgramContext *m_ctx = nullptr;
ImplicitArgs *m_pImplicitArgs = nullptr;
KernelArgs *m_pKernelArgs = nullptr;
ArgInfoMap m_argsInfo;
bool m_changed = false;
llvm::Function *m_F = nullptr;
llvm::Module *m_Module = nullptr;
// Map argument index to a vector of instructions that should be promoted to stateful.
std::map<unsigned int, std::vector<InstructionInfo>> m_promotionMap;
};
} // namespace IGC
|