1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#pragma once
#include "common/LLVMWarningsPush.hpp"
#include <llvm/ADT/SetVector.h>
#include <llvm/Pass.h>
#include <llvmWrapper/IR/IRBuilder.h>
#include <llvm/IR/Dominators.h>
#include <llvm/Analysis/PostDominators.h>
#include "common/LLVMWarningsPop.hpp"
#include "Compiler/CodeGenPublic.h"
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
void initializeThreadCombiningPass(llvm::PassRegistry&);
namespace IGC
{
enum dim
{
ThreadGroupSize_X,
ThreadGroupSize_Y,
ThreadGroupSize_Z
};
class ThreadCombining : public llvm::ModulePass
{
public:
ThreadCombining()
: ModulePass(ID)
, m_kernel(nullptr)
, m_SLMUsed(false)
{
initializeThreadCombiningPass(*llvm::PassRegistry::getPassRegistry());
}
~ThreadCombining() {}
virtual llvm::StringRef getPassName() const override
{
return "ThreadCombining";
}
bool runOnModule(llvm::Module& M) override;
virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override
{
AU.addRequired<llvm::DominatorTreeWrapperPass>();
AU.addRequired<llvm::PostDominatorTreeWrapperPass>();
AU.addRequired<CodeGenContextWrapper>();
AU.addRequired<MetaDataUtilsWrapper>();
}
llvm::Function* m_kernel;
bool m_SLMUsed;
std::vector<llvm::Instruction*> m_barriers;
llvm::SetVector<llvm::Instruction*> m_aliveAcrossBarrier;
std::set<llvm::Instruction*> m_instructionsToMove;
std::map<llvm::Instruction*, std::set<llvm::Instruction*>> m_LiveRegistersPerBarrier;
static char ID;
bool isBarrier(llvm::Instruction& I) const;
bool isSLMUsed(llvm::Instruction* I) const;
unsigned int GetthreadGroupSize(llvm::Module& M, dim dimension);
void SetthreadGroupSize(llvm::Module& M, llvm::Constant* size, dim dimension);
void remapThreads(llvm::Module& M,
unsigned int newSizeX,
unsigned int newSizeY,
unsigned int threadGroupSize_X,
unsigned int threadGroupSize_Y,
IGCLLVM::IRBuilder<>& builder);
void CreateLoopKernel(
llvm::Module& M,
unsigned int newSizeX,
unsigned int newSizeY,
unsigned int threadGroupSize_X,
unsigned int threadGroupSize_Y,
llvm::Function* newFunc,
IGCLLVM::IRBuilder<>& builder);
void CreateNewKernel(llvm::Module& M,
IGCLLVM::IRBuilder<>& builder,
llvm::Function* newFunc);
bool canDoOptimization(llvm::Function* m_kernel, llvm::Module& M);
void PreAnalysis(llvm::Function* m_kernel, llvm::Module& M, std::vector<llvm::Instruction*>& barriers);
void FindRegistersAliveAcrossBarriers(llvm::Function* m_kernel, llvm::Module& M);
};
}
|