File: EstimateFunctionSize.h

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (124 lines) | stat: -rw-r--r-- 5,046 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once

#include "common/LLVMWarningsPush.hpp"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Pass.h"
#include <llvm/IR/InstVisitor.h>
#include <llvm/ADT/StringRef.h>
#include "common/LLVMWarningsPop.hpp"
#include <cstddef>
#include "Probe/Assertion.h"
#include <deque>
#include <unordered_map>

namespace IGC {

    /// \brief Estimate function size after complete inlining.
    ///
    /// This pass visits the call graph and estimates the number of llvm IR
    /// instructions after complete inlining.
    class EstimateFunctionSize : public llvm::ModulePass, public llvm::InstVisitor<EstimateFunctionSize>
    {
    public:
        static char ID;

        enum AnalysisLevel {
            AL_Module,
            AL_Kernel
        };

        explicit EstimateFunctionSize(AnalysisLevel = AL_Module);
        ~EstimateFunctionSize();
        virtual llvm::StringRef getPassName() const  override { return "Estimate Function Sizes"; }
        void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
        bool runOnModule(llvm::Module& M) override;

        /// \brief Return the estimated maximal function size after complete inlining.
        std::size_t getMaxExpandedSize() const;

        /// \brief Return the estimated function size after complete inlining.
        std::size_t getExpandedSize(const llvm::Function* F) const;

        bool onlyCalledOnce(const llvm::Function* F);

        bool hasRecursion() const { return HasRecursion; }

        bool shouldEnableSubroutine() const { return EnableSubroutine; }

        bool isTrimmedFunction( llvm::Function* F);

        void visitCallInst( llvm::CallInst& CI );

        bool isStackCallAssigned(llvm::Function* F);


    private:
        void analyze();
        void checkSubroutine();
        void clear();
        void reduceKernelSize();

        /// \brief Return the associated opaque data.
        template <typename T> T* get(llvm::Function* F) {
            IGC_ASSERT(ECG.count(F));
            return static_cast<T*>(ECG[F]);
        }

        void initializeTopologicalVisit(llvm::Function* root, std::unordered_map<void*, uint32_t>& FunctionsInKernel, std::deque<void*>& BottomUpQueue, bool ignoreStackCallBoundary);
        uint32_t updateExpandedUnitSize(llvm::Function* F, bool ignoreStackCallBoundary);
        uint32_t bottomUpHeuristic(llvm::Function* F, uint32_t& stackCall_cnt);
        void partitionKernel();
        void reduceCompilationUnitSize();
        void trimCompilationUnit(llvm::SmallVector<void*, 64> &unitHeads, uint32_t threshold, bool ignoreStackCallBoundary);
        uint32_t getMaxUnitSize();
        void getFunctionsToTrim(llvm::Function* root, llvm::SmallVector<void*, 64> &functions_to_trim, bool ignoreStackCallBoundary, uint32_t& func_cnt);

        /// \brief The module being analyzed.
        llvm::Module* M;

        /// \brief The analysis level to be performed.
        AnalysisLevel AL;

        bool tmpHasImplicitArg;
        bool matchImplicitArg( llvm::CallInst& CI );

        const llvm::StringRef GET_LOCAL_ID_X = "__builtin_IB_get_local_id_x";
        const llvm::StringRef GET_LOCAL_ID_Y = "__builtin_IB_get_local_id_y";
        const llvm::StringRef GET_LOCAL_ID_Z = "__builtin_IB_get_local_id_z";
        const llvm::StringRef GET_GROUP_ID = "__builtin_IB_get_group_id";
        const llvm::StringRef GET_LOCAL_THREAD_ID = "__builtin_IB_get_local_thread_id";
        const llvm::StringRef GET_GLOBAL_SIZE = "__builtin_IB_get_global_size";
        const llvm::StringRef GET_LOCAL_SIZE = "__builtin_IB_get_local_size";
        const llvm::StringRef GET_GLOBAL_OFFSET = "__builtin_IB_get_global_offset";
        const llvm::StringRef GET_WORK_DIM = "__builtin_IB_get_work_dim";
        const llvm::StringRef GET_NUM_GROUPS = "__builtin_IB_get_num_groups";
        const llvm::StringRef GET_ENQUEUED_LOCAL_SIZE = "__builtin_IB_get_enqueued_local_size";
        const llvm::StringRef GET_STAGE_IN_GRID_ORIGIN = "__builtin_IB_get_stage_in_grid_origin";
        const llvm::StringRef GET_STAGE_IN_GRID_SIZE = "__builtin_IB_get_stage_in_grid_size";
        const llvm::StringRef GET_SYNC_BUFFER = "__builtin_IB_get_sync_buffer";

        bool HasRecursion;
        bool EnableSubroutine;

        /// Internal data structure for the analysis which is approximately an
        /// extended call graph.
        llvm::SmallDenseMap<llvm::Function*, void*> ECG;
        //Kernel entries
        llvm::SmallVector<void*, 64> kernelEntries;
        //Functions that are assigned stackcalls
        llvm::SmallVector<void*, 64> stackCallFuncs;
        llvm::SmallVector<void*, 64> addressTakenFuncs;
    };

    llvm::ModulePass* createEstimateFunctionSizePass();
    llvm::ModulePass* createEstimateFunctionSizePass(EstimateFunctionSize::AnalysisLevel);

} // namespace IGC