1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file implements the idea to rewrite undef incoming operand for certain
// PHIs in structurized CFG. This pass only works on IR that has gone through
// StructurizedCFG pass, and this pass has some additional limitation that make
// it can only run after SIAnnotateControlFlow.
//
// To achieve optimal code generation for AMDGPU, we assume that uniformity
// analysis reports the PHI in join block of divergent branch as uniform if
// it has one unique uniform value plus additional undefined/poisoned incoming
// value. That is to say the later compiler pipeline will ensure such PHI always
// return uniform value and ensure it work correctly. Let's take a look at two
// typical patterns in structured CFG that need to be taken care: (In both
// patterns, block %if terminate with divergent branch.)
//
// Pattern A: Block with undefined incoming value dominates defined predecessor
// %if
// | \
// | %then
// | /
// %endif: %phi = phi [%undef, %if], [%uniform, %then]
//
// Pattern B: Block with defined incoming value dominates undefined predecessor
// %if
// | \
// | %then
// | /
// %endif: %phi = phi [%uniform, %if], [%undef, %then]
//
// For pattern A, by reporting %phi as uniform, the later pipeline need to make
// sure it be handled correctly. The backend usually allocates a scalar register
// and if any thread in a wave takes %then path, the scalar register will get
// the %uniform value.
//
// For pattern B, we will replace the undef operand with the other defined value
// in this pass. So the scalar register allocated for such PHI will get correct
// liveness. Without this transformation, the scalar register may be overwritten
// in the %then block.
//
// Limitation note:
// If the join block of divergent threads is a loop header, the pass cannot
// handle it correctly right now. For below case, the undef in %phi should also
// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
// block to get a separate join block, then we can rewrite the undef correctly.
// %if
// | \
// | %then
// | /
// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
// | |
// \---
#include "AMDGPU.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
namespace {
class AMDGPURewriteUndefForPHI : public FunctionPass {
public:
static char ID;
AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override {
return "AMDGPU Rewrite Undef for PHI";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<UniformityInfoWrapperPass>();
AU.setPreservesCFG();
}
};
} // end anonymous namespace
char AMDGPURewriteUndefForPHI::ID = 0;
INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
"Rewrite undef for PHI", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
"Rewrite undef for PHI", false, false)
bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
bool Changed = false;
SmallVector<PHINode *> ToBeDeleted;
for (auto &BB : F) {
for (auto &PHI : BB.phis()) {
if (UA.isDivergent(&PHI))
continue;
// The unique incoming value except undef/poison for the PHI node.
Value *UniqueDefinedIncoming = nullptr;
// The divergent block with defined incoming value that dominates all
// other block with the same incoming value.
BasicBlock *DominateBB = nullptr;
// Predecessors with undefined incoming value (excluding loop backedge).
SmallVector<BasicBlock *> Undefs;
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
Value *Incoming = PHI.getIncomingValue(i);
BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
if (Incoming == &PHI)
continue;
if (isa<UndefValue>(Incoming)) {
// Undef from loop backedge will not be replaced.
if (!DT->dominates(&BB, IncomingBB))
Undefs.push_back(IncomingBB);
continue;
}
if (!UniqueDefinedIncoming) {
UniqueDefinedIncoming = Incoming;
DominateBB = IncomingBB;
} else if (Incoming == UniqueDefinedIncoming) {
// Update DominateBB if necessary.
if (DT->dominates(IncomingBB, DominateBB))
DominateBB = IncomingBB;
} else {
UniqueDefinedIncoming = nullptr;
break;
}
}
// We only need to replace the undef for the PHI which is merging
// defined/undefined values from divergent threads.
// TODO: We should still be able to replace undef value if the unique
// value is a Constant.
if (!UniqueDefinedIncoming || Undefs.empty() ||
!UA.isDivergent(DominateBB->getTerminator()))
continue;
// We only replace the undef when DominateBB truly dominates all the
// other predecessors with undefined incoming value. Make sure DominateBB
// dominates BB so that UniqueDefinedIncoming is available in BB and
// afterwards.
if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
return DT->dominates(DominateBB, UD);
})) {
PHI.replaceAllUsesWith(UniqueDefinedIncoming);
ToBeDeleted.push_back(&PHI);
Changed = true;
}
}
}
for (auto *PHI : ToBeDeleted)
PHI->eraseFromParent();
return Changed;
}
bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
UniformityInfo &UA =
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return rewritePHIs(F, UA, DT);
}
FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
return new AMDGPURewriteUndefForPHI();
}
|