File: StatelessToStateful.hpp

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (188 lines) | stat: -rw-r--r-- 7,482 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once

#include "AdaptorCommon/ImplicitArgs.hpp"
#include "Compiler/Optimizer/OpenCLPasses/KernelArgs/KernelArgs.hpp"
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
#include "Compiler/MetaDataUtilsWrapper.h"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/Pass.h>
#include <llvm/IR/InstVisitor.h>
#include <llvm/IR/Instruction.h>
#include <llvm/Analysis/AssumptionCache.h>
#include "common/LLVMWarningsPop.hpp"
#include "Probe/Assertion.h"

namespace IGC {
// Too many stateful promotion will overwhelm the surface state
// cache(32 entries per HDC), which will significantly impact the
// performance. Simply disable stateful promotion after 32 args.
constexpr uint maxPromotionCount = 32;

enum class TargetAddressing { BINDFUL, BINDLESS };

class StatelessToStateful : public llvm::ModulePass, public llvm::InstVisitor<StatelessToStateful> {
public:
  typedef llvm::DenseMap<const KernelArg *, int> ArgInfoMap;

  static char ID;

  StatelessToStateful();
  StatelessToStateful(TargetAddressing addressing);

  ~StatelessToStateful() {}

  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
    AU.setPreservesCFG();
    AU.addRequired<MetaDataUtilsWrapper>();
    AU.addRequired<llvm::AssumptionCacheTracker>();
    AU.addRequired<CodeGenContextWrapper>();
  }

  virtual llvm::StringRef getPassName() const override { return "StatelessToStateful"; }

  virtual bool runOnModule(llvm::Module &M) override;

  void visitLoadInst(llvm::LoadInst &I);
  void visitStoreInst(llvm::StoreInst &I);
  void visitCallInst(llvm::CallInst &I);

private:
  struct InstructionInfo {
    InstructionInfo(llvm::Instruction *I, llvm::Value *ptr, llvm::Value *offset)
        : statelessInst(I), ptr(ptr), offset(offset) {}
    InstructionInfo() = delete;

    void setStatefulAddrspace(unsigned addrspace) { statefulAddrSpace = addrspace; }
    unsigned getStatefulAddrSpace() {
      IGC_ASSERT(statefulAddrSpace);
      return *statefulAddrSpace;
    }
    void setBaseArgIndex(unsigned index) { baseArgIndex = index; }
    unsigned getBaseArgIndex() { return baseArgIndex; }
    llvm::Instruction *const statelessInst;
    llvm::Value *const ptr;
    llvm::Value *const offset;

  private:
    unsigned baseArgIndex = 0;
    std::optional<unsigned> statefulAddrSpace;
  };

  void handleFunction(llvm::Function &F);

  void setModuleUsesBindless();
  bool getModuleUsesBindless();

  void findPromotableInstructions();
  void addToPromotionMap(llvm::Instruction &I, llvm::Value *Ptr);

  void promote();
  void promoteInstruction(InstructionInfo &InstInfo);
  void promoteLoad(InstructionInfo &InstInfo);
  void promoteStore(InstructionInfo &InstInfo);
  void promoteIntrinsic(InstructionInfo &InstInfo);

  bool doPromoteUntypedAtomics(const llvm::GenISAIntrinsic::ID intrinID, const llvm::GenIntrinsicInst *Inst);
  bool isUntypedAtomic(const llvm::GenISAIntrinsic::ID intrinID);

  // LLVM InstCombine pass replaces multiple loads that have a single phi instruction as their user,
  // with a phi on the addresses followed by a single load. This prevents StatelessToStateful from making loads
  // statefull.
  // These functions are aimed to revert this change which was made by InstCombine.
  bool hoistLoad();
  bool canWriteToMemoryTill(llvm::Instruction *Till);
  bool isItSafeToHoistLoad(llvm::LoadInst *LI, llvm::PHINode *Phi);

  // pointerIsPositiveOffsetFromKernelArgument - check if V can trace back to a kernel argument and
  // has positive offset from that argument.
  // ignoreSyncBuffer - when set to true, return false directly if V is from the implicit kernel
  // argument "sync buffer". sync buffer must be stateless access in ZEBinary path so cannot be promoted.
  bool pointerIsPositiveOffsetFromKernelArgument(llvm::Function *F, llvm::Value *V, llvm::Value *&offset,
                                                 unsigned int &argNumber, bool ignoreSyncBuffer);

  // Check if the given pointer value can be traced back to any kernel argument.
  // return the kernel argument if found, otherwise return nullptr.
  const KernelArg *getKernelArgFromPtr(const llvm::PointerType &ptrType, llvm::Value *pVal);

  // check if the given pointer can be traced back to any kernel argument
  bool pointerIsFromKernelArgument(llvm::Value &ptr);

  bool getOffsetFromGEP(llvm::Function *F, const llvm::SmallVector<llvm::GetElementPtrInst *, 4> &GEPs,
                        uint32_t argNumber, bool isImplicitArg, llvm::Value *&offset);
  llvm::Argument *getBufferOffsetArg(llvm::Function *F, uint32_t ArgNumber);
  void setPointerSizeTo32bit(int32_t AddrSpace, llvm::Module *M);

  // Encode uavIndex in addrspace. Note that uavIndex is not always the same as BTI.
  // Read only images are qualified as SRV resources and have separate indices space.
  // Writeable images and buffers are qualified as UAV resources and also have a
  // separate indices space. So if there is a read_only image and global buffer in the kernel,
  // they will both have `0` encoded in addrspace. The actual BTI will be computed based
  // on BTLayout in EmitVISAPass.
  unsigned encodeBindfulAddrspace(unsigned uavIndex);

  void updateArgInfo(const KernelArg *KA, bool IsPositive);
  void finalizeArgInitialValue(llvm::Function *F);

  const KernelArg *getKernelArg(llvm::Value *Arg) {
    IGC_ASSERT_MESSAGE(m_pKernelArgs, "Should initialize it before use!");
    for (const KernelArg &arg : *m_pKernelArgs) {
      if (arg.getArg() == Arg) {
        return &arg;
      }
    }
    return nullptr;
  }

  const KernelArg *getBufferOffsetKernelArg(const KernelArg *KA) {
    IGC_ASSERT_MESSAGE(m_pKernelArgs, "KernelArgs: should initialize it before use!");
    int argno = KA->getAssociatedArgNo();
    for (const KernelArg &arg : *m_pKernelArgs) {
      if (arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET && arg.getAssociatedArgNo() == argno) {
        return &arg;
      }
    }
    return nullptr;
  }

  // When true, runtime can generate surface with buffer's original base (creation base)
  bool m_hasBufferOffsetArg = false;

  // When m_hasBufferOffsetArg is true, optional buffer offset
  // can be on or off, which is indicated by this boolean flag.
  bool m_hasOptionalBufferOffsetArg = false;

  // When true, every messages that are in ptrArg + offset will have offset >= 0.
  bool m_hasPositivePointerOffset = false;

  // Handle non-gep pointer
  //   For historic reason (probably non-DW aligned arg), non-gep ptr isn't handled.
  //   If this field is true, non-gep ptr shall be handled.
  const bool m_supportNonGEPPtr = false;

  llvm::AssumptionCacheTracker *m_ACT = nullptr;
  llvm::AssumptionCache *getAC(llvm::Function *F) {
    return (m_ACT != nullptr ? &m_ACT->getAssumptionCache(*F) : nullptr);
  }

  TargetAddressing m_targetAddressing;
  OpenCLProgramContext *m_ctx = nullptr;
  ImplicitArgs *m_pImplicitArgs = nullptr;
  KernelArgs *m_pKernelArgs = nullptr;
  ArgInfoMap m_argsInfo;
  bool m_changed = false;
  llvm::Function *m_F = nullptr;
  llvm::Module *m_Module = nullptr;

  // Map argument index to a vector of instructions that should be promoted to stateful.
  std::map<unsigned int, std::vector<InstructionInfo>> m_promotionMap;
};

} // namespace IGC