1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass marks all internal functions as always_inline and creates
/// duplicates of all other functions and marks the duplicates as always_inline.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
namespace {
static cl::opt<bool> StressCalls(
"amdgpu-stress-function-calls",
cl::Hidden,
cl::desc("Force all functions to be noinline"),
cl::init(false));
class AMDGPUAlwaysInline : public ModulePass {
bool GlobalOpt;
public:
static char ID;
AMDGPUAlwaysInline(bool GlobalOpt = false) :
ModulePass(ID), GlobalOpt(GlobalOpt) { }
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
} // End anonymous namespace
INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
"AMDGPU Inline All Functions", false, false)
char AMDGPUAlwaysInline::ID = 0;
static void
recursivelyVisitUsers(GlobalValue &GV,
SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
SmallVector<User *, 16> Stack(GV.users());
SmallPtrSet<const Value *, 8> Visited;
while (!Stack.empty()) {
User *U = Stack.pop_back_val();
if (!Visited.insert(U).second)
continue;
if (Instruction *I = dyn_cast<Instruction>(U)) {
Function *F = I->getParent()->getParent();
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
// FIXME: This is a horrible hack. We should always respect noinline,
// and just let us hit the error when we can't handle this.
//
// Unfortunately, clang adds noinline to all functions at -O0. We have
// to override this here until that's fixed.
F->removeFnAttr(Attribute::NoInline);
FuncsToAlwaysInline.insert(F);
Stack.push_back(F);
}
// No need to look at further users, but we do need to inline any callers.
continue;
}
append_range(Stack, U->users());
}
}
static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
std::vector<GlobalAlias*> AliasesToRemove;
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
SmallPtrSet<Function *, 8> FuncsToNoInline;
Triple TT(M.getTargetTriple());
for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
if (TT.getArch() == Triple::amdgcn &&
A.getLinkage() != GlobalValue::InternalLinkage)
continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
// FIXME: If the aliasee isn't a function, it's some kind of constant expr
// cast that won't be inlined through.
}
if (GlobalOpt) {
for (GlobalAlias* A : AliasesToRemove) {
A->eraseFromParent();
}
}
// Always force inlining of any function that uses an LDS global address. This
// is something of a workaround because we don't have a way of supporting LDS
// objects defined in functions. LDS is always allocated by a kernel, and it
// is difficult to manage LDS usage if a function may be used by multiple
// kernels.
//
// OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
// should only appear when IPO passes manages to move LDs defined in a kernel
// into a single user function.
for (GlobalVariable &GV : M.globals()) {
// TODO: Region address
unsigned AS = GV.getAddressSpace();
if ((AS == AMDGPUAS::REGION_ADDRESS) ||
(AS == AMDGPUAS::LOCAL_ADDRESS &&
(!AMDGPUTargetMachine::EnableLowerModuleLDS)))
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
auto IncompatAttr
= StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
for (Function &F : M) {
if (!F.isDeclaration() && !F.use_empty() &&
!F.hasFnAttribute(IncompatAttr)) {
if (StressCalls) {
if (!FuncsToAlwaysInline.count(&F))
FuncsToNoInline.insert(&F);
} else
FuncsToAlwaysInline.insert(&F);
}
}
}
for (Function *F : FuncsToAlwaysInline)
F->addFnAttr(Attribute::AlwaysInline);
for (Function *F : FuncsToNoInline)
F->addFnAttr(Attribute::NoInline);
return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
}
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
return alwaysInlineImpl(M, GlobalOpt);
}
ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
return new AMDGPUAlwaysInline(GlobalOpt);
}
PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
ModuleAnalysisManager &AM) {
alwaysInlineImpl(M, GlobalOpt);
return PreservedAnalyses::all();
}
|