File: GenXSLMResolution.cpp

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (288 lines) | stat: -rw-r--r-- 9,964 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/*========================== begin_copyright_notice ============================

Copyright (C) 2023 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

//
/// GenXSLMResolution
/// ---------------------------
///
/// GenXSLMResolution is a module pass which performs the following:
/// a. Lower llvm.genx.slm.init intrinsic.
/// b. Replace all SLM variables and the first SLM kernel argument uses
/// with offsets to SLM buffer.
///
/// To properly assign an offset to SLM variable the pass does the following:
/// 1. Build list of functions which are invoked from kernel by traversing
/// the call graph.
/// 2. Check which SLM variables have uses in functions from the function
/// group.
/// 3. Sort variables in alignment decline order and assign offsets to
/// them.
/// 4. Update total SLM size for a function group Head (kernel).
///
/// It is possible to avoid call graph traversing and simplify this pass by
/// making it a FunctionGroup pass. However, it will take away the opportunity
/// to run InstCombine after which is required to fold constants after SLM
/// variable uses replacing.
///
/// **IR Restriction** After this pass SLM size in kernel metadata should not be
/// updated anymore.
//
//===----------------------------------------------------------------------===//

#include "GenX.h"
#include "GenXUtil.h"

#include "vc/Support/GenXDiagnostic.h"
#include "vc/Utils/GenX/GlobalVariable.h"
#include "vc/Utils/GenX/KernelInfo.h"
#include "vc/Utils/General/Types.h"

#include "llvmWrapper/Analysis/CallGraph.h"
#include "llvmWrapper/IR/Value.h"
#include "llvmWrapper/Support/Alignment.h"

#include <llvm/IR/Constants.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/Module.h>
#include <llvm/InitializePasses.h>
#include <llvm/Pass.h>

using namespace llvm;

namespace {

class GenXSLMResolution : public ModulePass {
  CallGraph *CG = nullptr;
  const DataLayout *DL = nullptr;

public:
  static char ID;
  explicit GenXSLMResolution() : ModulePass(ID) {}
  StringRef getPassName() const override { return "GenX SLM Resolution"; }
  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<CallGraphWrapperPass>();
    AU.setPreservesCFG();
  }
  bool runOnModule(Module &M) override;

private:
  IGCLLVM::Align getSLMArgAlign(const Argument &A) const;
  IGCLLVM::Align getGlobalVarAlign(const GlobalVariable &GV) const;
  Constant *allocateOnSLM(const GlobalVariable &GV, unsigned &SLMSize) const;
  Constant *getNextOffset(IGCLLVM::Align Alignment, LLVMContext &Ctx,
                          unsigned &SLMSize) const;
  void
  replaceSLMVariablesWithOffsets(SmallVectorImpl<GlobalVariable *> &Workload,
                                 SmallPtrSetImpl<Function *> &FunctionSet,
                                 unsigned &SLMSize) const;
  bool runForKernel(Function &Head, Module &M,
                    ArrayRef<GlobalVariable *> SLMVars);
};

} // end namespace

char GenXSLMResolution::ID = 0;
namespace llvm {
void initializeGenXSLMResolutionPass(PassRegistry &);
} // end namespace llvm

INITIALIZE_PASS_BEGIN(GenXSLMResolution, "GenXSLMResolution",
                      "GenXSLMResolution", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(GenXSLMResolution, "GenXSLMResolution", "GenXSLMResolution",
                    false, false)

ModulePass *llvm::createGenXSLMResolution() {
  initializeGenXSLMResolutionPass(*PassRegistry::getPassRegistry());
  return new GenXSLMResolution;
}

static void lowerSlmInit(Instruction &I) {
  auto *BB = I.getParent();
  auto *F = BB->getParent();
  if (!vc::isKernel(F))
    vc::fatal(I.getContext(), "GenXSLMResolution",
              "SLM init call is supported only in kernels", &I);

  auto *V = dyn_cast<ConstantInt>(I.getOperand(0));
  if (!V)
    vc::fatal(I.getContext(), "GenXSLMResolution",
              "Cannot reserve non-constant amount of SLM", &I);

  unsigned SLMSize = V->getValue().getZExtValue();
  vc::KernelMetadata MD{F};
  if (SLMSize > MD.getSLMSize())
    MD.updateSLMSizeMD(SLMSize);
}

static bool isUserInList(const User &U,
                         const SmallPtrSetImpl<Function *> &FunctionSet) {
  if (auto *I = dyn_cast<Instruction>(&U)) {
    auto *F = I->getFunction();
    return FunctionSet.count(F);
  }
  IGC_ASSERT_MESSAGE(isa<Constant>(&U), "unexpected SLM variable user");
  // For constant user continue recursively traversing until instruction
  // is met.
  return llvm::any_of(U.users(), [&FunctionSet](const User *U) {
    return isUserInList(*U, FunctionSet);
  });
}

static bool isBelongToKernel(const GlobalVariable &GV,
                             const SmallPtrSetImpl<Function *> &FunctionSet) {
  return llvm::any_of(GV.users(), [&FunctionSet](const User *U) {
    return isUserInList(*U, FunctionSet);
  });
}

static SmallPtrSet<Function *, 8> traverseCallGraph(Function &Head,
                                                    CallGraph &CG) {
  SmallPtrSet<Function *, 8> Visited = {&Head};
  SmallVector<Function *, 8> Stack = {&Head};
  while (!Stack.empty()) {
    auto *F = Stack.pop_back_val();
    CallGraphNode &N = *CG[F];
    for (IGCLLVM::CallRecord CE : N) {
      auto *Child = CE.second->getFunction();
      if (!Child || Child->isDeclaration())
        continue;
      if (Visited.insert(Child).second)
        Stack.push_back(Child);
    }
  }
  return Visited;
}

static SmallVector<GlobalVariable *, 4> collectSLMVariables(Module &M) {
  SmallVector<GlobalVariable *, 4> SLMVars;
  for (auto &GV : M.globals()) {
    if ((GV.getAddressSpace() != vc::AddrSpace::Local) ||
        !vc::isRealGlobalVariable(GV))
      continue;
    if (!GV.hasLocalLinkage()) {
      vc::diagnose(GV.getContext(), "GenXSLMResolution",
                   "SLM variables must have local linkage", &GV);
      continue;
    }
    SLMVars.push_back(&GV);
  }
  return SLMVars;
}

IGCLLVM::Align GenXSLMResolution::getSLMArgAlign(const Argument &A) const {
  auto *TypeToAlign = IGCLLVM::getNonOpaquePtrEltTy(A.getType());
  return IGCLLVM::getABITypeAlign(*DL, TypeToAlign);
}

IGCLLVM::Align
GenXSLMResolution::getGlobalVarAlign(const GlobalVariable &GV) const {
  if (GV.getAlignment())
    return IGCLLVM::getAlign(GV);
  return IGCLLVM::getABITypeAlign(*DL, GV.getValueType());
}

Constant *GenXSLMResolution::getNextOffset(IGCLLVM::Align Alignment,
                                           LLVMContext &Ctx,
                                           unsigned &SLMSize) const {
  SLMSize = IGCLLVM::alignTo(SLMSize, Alignment);
  unsigned SLMOffset = SLMSize ? SLMSize : genx::SlmNullProtection;
  auto *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), SLMOffset);
  return Offset;
}

Constant *GenXSLMResolution::allocateOnSLM(const GlobalVariable &GV,
                                           unsigned &SLMSize) const {
  auto *PtrTy = cast<PointerType>(GV.getType());
  auto Align = getGlobalVarAlign(GV);
  auto *ElemTy = GV.getValueType();
  auto *Offset = getNextOffset(Align, GV.getContext(), SLMSize);
  auto TypeSize = DL->getTypeStoreSize(ElemTy);
  SLMSize += TypeSize;
  return ConstantExpr::getIntToPtr(Offset, PtrTy);
}

void GenXSLMResolution::replaceSLMVariablesWithOffsets(
    SmallVectorImpl<GlobalVariable *> &Workload,
    SmallPtrSetImpl<Function *> &FunctionSet, unsigned &SLMSize) const {
  llvm::stable_sort(Workload, [this](const auto *lhs, const auto *rhs) {
    return getGlobalVarAlign(*lhs) > getGlobalVarAlign(*rhs);
  });
  for (auto *GV : Workload) {
    auto *Offset = allocateOnSLM(*GV, SLMSize);
    IGCLLVM::replaceUsesWithIf(GV, Offset, [this, &FunctionSet](const Use &U) {
      return isUserInList(*U.getUser(), FunctionSet);
    });
  }
}

bool GenXSLMResolution::runForKernel(Function &Head, Module &M,
                                     ArrayRef<GlobalVariable *> SLMVars) {
  bool Modified = false;
  vc::KernelMetadata KM{&Head};
  unsigned SLMSize = KM.getSLMSize();

  if (!SLMVars.empty()) {
    // Traverse call graph to get all functions that are invoked from Head.
    auto FunctionSet = traverseCallGraph(Head, *CG);

    // Get all SLM variables that have users in any function from list.
    SmallVector<GlobalVariable *, 4> Workload;
    for (auto *GV : SLMVars) {
      if (isBelongToKernel(*GV, FunctionSet))
        Workload.push_back(GV);
    }
    if (!Workload.empty()) {
      replaceSLMVariablesWithOffsets(Workload, FunctionSet, SLMSize);
      KM.updateSLMSizeMD(SLMSize);
      Modified = true;
    }
  }

  // The first SLM kernel argument can be replaced with offset.
  // As a result, we make the pointer non-zero and get some
  // perfomance (due to a constant folding later).
  auto *Arg = llvm::find_if(Head.args(), [](Argument &A) {
    auto *PtrTy = dyn_cast<PointerType>(A.getType());
    return PtrTy && (vc::getAddrSpace(PtrTy) == vc::AddrSpace::Local);
  });
  if (Arg == Head.arg_end())
    return Modified;

  auto Align = getSLMArgAlign(*Arg);
  auto *Offset = getNextOffset(Align, Head.getContext(), SLMSize);
  auto *NewPtr = ConstantExpr::getIntToPtr(Offset, Arg->getType());
  Arg->replaceAllUsesWith(NewPtr);
  return true;
}

bool GenXSLMResolution::runOnModule(Module &M) {
  CG = &getAnalysis<CallGraphWrapperPass>().getCallGraph();
  DL = &M.getDataLayout();

  bool Modified = false;
  SmallVector<Instruction *, 4> SLMInitToErase;
  for (auto &F : M.functions()) {
    for (auto &Inst : instructions(F)) {
      if (GenXIntrinsic::getGenXIntrinsicID(&Inst) ==
          GenXIntrinsic::genx_slm_init) {
        lowerSlmInit(Inst);
        SLMInitToErase.push_back(&Inst);
        Modified = true;
      }
    }
  }
  llvm::for_each(SLMInitToErase, [](Instruction *I) { I->eraseFromParent(); });

  auto SLMVars = collectSLMVariables(M);
  for (auto &F : M.functions())
    if (vc::isKernel(&F))
      Modified |= runForKernel(F, M, SLMVars);
  return Modified;
}