1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2022 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
//
/// GenXVectorDecomposer
/// --------------------
///
/// GenXVectorDecomposer is not a pass; instead it is a class is called by by
/// the GenXPostLegalization pass to perform vector decomposition.
///
/// For a vector written by wrregion and read by rdregion, it finds the way that
/// the vector can be divided into parts, with each part a range of one or more
/// GRFs, such that no rdregion or wrregion crosses a part boundary. Then it
/// decomposes the vector into those parts. A rdregion/wrregion that reads/writes
/// a whole part can be removed completely; a rdregion/wrregion that reads/writes
/// only some of the part is replaced to read/write just the applicable part.
///
/// In fact it does all this for a web of vectors linked by wrregion, phi nodes
/// and bitcasts.
///
/// The idea is that having lots of small vectors instead of one big vector
/// reduces register fragmentation in the finalizer's register allocator.
///
/// There is an option -limit-genx-vector-decomposer=N to aid debugging the code
/// changes made by the vector decomposer.
///
//===----------------------------------------------------------------------===//
#pragma once
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Instructions.h"
#include <map>
#include <set>
namespace llvm {
class Constant;
class Instruction;
class PHINode;
class Type;
class Use;
class GenXSubtarget;
} // namespace llvm
namespace vc {
class Region;
// VectorDecomposer : decomposes vectors in a function
class VectorDecomposer {
using Constant = llvm::Constant;
using DataLayout = llvm::DataLayout;
using Instruction = llvm::Instruction;
using PHINode = llvm::PHINode;
using Type = llvm::Type;
using Twine = llvm::Twine;
using Use = llvm::Use;
using Value = llvm::Value;
using VectorType = llvm::VectorType;
using GenXSubtarget = llvm::GenXSubtarget;
const DataLayout *DL = nullptr;
llvm::SmallVector<Instruction *, 16> StartWrRegions;
std::set<Instruction *> Seen;
llvm::SmallVector<Instruction *, 16> Web;
llvm::SmallVector<Instruction *, 16> ToDelete;
bool NotDecomposing = false;
Instruction *NotDecomposingReportInst = nullptr;
llvm::SmallVector<unsigned, 8> Decomposition;
llvm::SmallVector<unsigned, 8> Offsets;
std::map<PHINode *, llvm::SmallVector<Value *, 8>> PhiParts;
llvm::SmallVector<Instruction *, 8> NewInsts;
unsigned DecomposedCount = 0;
public:
// clear : clear anything stored
void clear() {
clearOne();
StartWrRegions.clear();
Seen.clear();
ToDelete.clear();
DecomposedCount = 0;
}
// addStartWrRegion : add a wrregion with undef input to the list
void addStartWrRegion(Instruction *Inst) { StartWrRegions.push_back(Inst); }
// run : run the vector decomposer on the stored StartWrRegions
bool run(const DataLayout &ArgDL);
private:
// clearOne : clear from processing one web
void clearOne() {
Web.clear();
Decomposition.clear();
Offsets.clear();
PhiParts.clear();
NewInsts.clear();
}
bool processStartWrRegion(Instruction *Inst);
bool determineDecomposition(Instruction *Inst);
void addToWeb(Value *V, Instruction *User = nullptr);
void adjustDecomposition(Instruction *Inst);
void setNotDecomposing(Instruction *Inst, const char *Text);
void decompose();
void decomposeTree(Use *U, const llvm::SmallVectorImpl<Value *> *PartsIn);
void decomposePhiIncoming(PHINode *Phi, unsigned OperandNum,
const llvm::SmallVectorImpl<Value *> *PartsIn);
void decomposeRdRegion(Instruction *RdRegion,
const llvm::SmallVectorImpl<Value *> *PartsIn);
void decomposeWrRegion(Instruction *WrRegion,
llvm::SmallVectorImpl<Value *> *Parts);
void decomposeBitCast(Instruction *Inst,
llvm::SmallVectorImpl<Value *> *Parts);
unsigned getPartIndex(Region *R);
unsigned getPartOffset(unsigned PartIndex);
unsigned getPartNumBytes(Type *WholeTy, unsigned PartIndex);
unsigned getPartNumElements(Type *WholeTy, unsigned PartIndex);
VectorType *getPartType(Type *WholeTy, unsigned PartIndex);
Constant *getConstantPart(Constant *Whole, unsigned PartIndex);
void removeDeadCode();
void eraseInst(Instruction *Inst);
void emitWarning(Instruction *Inst, const Twine &Msg);
};
// Decompose predicate computation sequences for select
// to reduce flag register pressure.
class SelectDecomposer {
using GenXSubtarget = llvm::GenXSubtarget;
using Instruction = llvm::Instruction;
using Value = llvm::Value;
const GenXSubtarget *ST;
bool NotDecomposing = false;
llvm::SmallVector<Instruction *, 8> StartSelects;
llvm::SmallVector<Instruction *, 16> Web;
llvm::SmallVector<unsigned, 8> Decomposition;
llvm::SmallVector<unsigned, 8> Offsets;
std::set<Instruction *> Seen;
// Map each decomposed instructions to its corresonding part values.
llvm::SmallDenseMap<Value *, llvm::SmallVector<Value *, 8>> DMap;
public:
explicit SelectDecomposer(const GenXSubtarget *ST) : ST(ST) {}
void addStartSelect(Instruction *Inst) { StartSelects.push_back(Inst); }
bool run();
private:
void clear() {
NotDecomposing = false;
Web.clear();
Decomposition.clear();
Offsets.clear();
Seen.clear();
DMap.clear();
}
bool processStartSelect(Instruction *Inst);
bool determineDecomposition(Instruction* Inst);
void setNotDecomposing() { NotDecomposing = true; }
void addToWeb(Value *V);
void decompose(Instruction *Inst);
void decomposeSelect(Instruction *Inst);
void decomposeBinOp(Instruction *Inst);
void decomposeCmp(Instruction *Inst);
unsigned getPartOffset(unsigned PartIndex) const {
return Offsets[PartIndex];
}
unsigned getPartNumElements(unsigned PartIndex) const {
return Decomposition[PartIndex];
}
Value *getPart(Value *Whole, unsigned PartIndex, Instruction *Inst) const;
};
} // namespace vc
|