File: ThreadCombining.hpp

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (97 lines) | stat: -rw-r--r-- 3,170 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once

#include "common/LLVMWarningsPush.hpp"
#include <llvm/ADT/SetVector.h>
#include <llvm/Pass.h>
#include <llvmWrapper/IR/IRBuilder.h>
#include <llvm/IR/Dominators.h>
#include <llvm/Analysis/PostDominators.h>
#include "common/LLVMWarningsPop.hpp"

#include "Compiler/CodeGenPublic.h"
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"

void initializeThreadCombiningPass(llvm::PassRegistry&);

namespace IGC
{
    enum dim
    {
        ThreadGroupSize_X,
        ThreadGroupSize_Y,
        ThreadGroupSize_Z
    };

    class ThreadCombining : public llvm::ModulePass
    {
    public:
        ThreadCombining()
            : ModulePass(ID)
            , m_kernel(nullptr)
            , m_SLMUsed(false)
        {
            initializeThreadCombiningPass(*llvm::PassRegistry::getPassRegistry());
        }

        ~ThreadCombining() {}

        virtual llvm::StringRef getPassName() const override
        {
            return "ThreadCombining";
        }

        bool runOnModule(llvm::Module& M) override;

        virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override
        {
            AU.addRequired<llvm::DominatorTreeWrapperPass>();
            AU.addRequired<llvm::PostDominatorTreeWrapperPass>();
            AU.addRequired<CodeGenContextWrapper>();
            AU.addRequired<MetaDataUtilsWrapper>();
        }

        llvm::Function* m_kernel;
        bool m_SLMUsed;
        std::vector<llvm::Instruction*> m_barriers;
        llvm::SetVector<llvm::Instruction*> m_aliveAcrossBarrier;
        std::set<llvm::Instruction*> m_instructionsToMove;
        std::map<llvm::Instruction*, std::set<llvm::Instruction*>> m_LiveRegistersPerBarrier;
        static char ID;

        bool isBarrier(llvm::Instruction& I) const;
        bool isSLMUsed(llvm::Instruction* I) const;
        unsigned int GetthreadGroupSize(llvm::Module& M, dim dimension);
        void SetthreadGroupSize(llvm::Module& M, llvm::Constant* size, dim dimension);
        void remapThreads(llvm::Module& M,
            unsigned int newSizeX,
            unsigned int newSizeY,
            unsigned int threadGroupSize_X,
            unsigned int threadGroupSize_Y,
            IGCLLVM::IRBuilder<>& builder);

        void CreateLoopKernel(
            llvm::Module& M,
            unsigned int newSizeX,
            unsigned int newSizeY,
            unsigned int threadGroupSize_X,
            unsigned int threadGroupSize_Y,
            llvm::Function* newFunc,
            IGCLLVM::IRBuilder<>& builder);

        void CreateNewKernel(llvm::Module& M,
            IGCLLVM::IRBuilder<>& builder,
            llvm::Function* newFunc);

        bool canDoOptimization(llvm::Function* m_kernel, llvm::Module& M);
        void PreAnalysis(llvm::Function* m_kernel, llvm::Module& M, std::vector<llvm::Instruction*>& barriers);
        void FindRegistersAliveAcrossBarriers(llvm::Function* m_kernel, llvm::Module& M);
    };
}