File: LowerGEPForPrivMem.hpp

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (178 lines) | stat: -rw-r--r-- 7,041 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2024 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once
#include "common/LLVMWarningsPush.hpp"
#include <llvm/IR/InstVisitor.h>
#include <llvm/IR/Instructions.h>
#include <llvm/Pass.h>
#include "common/LLVMWarningsPop.hpp"

#include <vector>
#include <memory>

namespace IGC
{
    enum StatusPrivArr2Reg
    {
        OK,
        IsDynamicAlloca,
        CannotUseSOALayout,
        IsNotNativeType,
        OutOfAllocSizeLimit,
        OutOfMaxGRFPressure
    };

    /// Tries to promote array in private memory to indexable vector
    /// Uses register pressure to make sure it won't cause spilling
    llvm::FunctionPass* createPromotePrivateArrayToReg();

    struct SOALayoutInfo
    {
        /// Signifies if SOA layout be applied to this variable.
        bool canUseSOALayout;
        /// The base type that all memory instructions in the use graph operate
        /// upon. Can be a compound type as long as no instruction in the chain
        /// reads from/stores into the underlying smaller strides of the
        /// primitive type.
        llvm::Type* baseType;
        /// Signifies if all memory instructions that operate on a
        /// compound-typed variable are vector-typed.
        bool allUsesAreVector;
        // Partition size for new algorithm
        uint32_t SOAPartitionBytes;

        SOALayoutInfo() : canUseSOALayout(false), baseType(nullptr),
                          allUsesAreVector(false), SOAPartitionBytes(4) {}
        SOALayoutInfo(bool canUseSOALayout, llvm::Type* baseType,
                      bool allUsesAreVector, uint32_t Size) :
            canUseSOALayout(canUseSOALayout), baseType(baseType),
            allUsesAreVector(false), SOAPartitionBytes (Size) {}
        SOALayoutInfo(SOALayoutInfo&) = default;
        ~SOALayoutInfo() = default;
    };

    /// Conceptually, this class is quite similar to 'llvm::PtrUseVisitor', but
    /// looks to ensure that actual boolean values are returned by 'visit()'
    /// calls. Through this depth-first traversal of the alloca's use-graph, we
    /// aim to determine if "structure of arrays" layout can be applied to the
    /// initialized variable.
    ///
    /// TODO: Consider generalizing the traversal logic as a parent interface
    /// (e.g. 'InstUseVisitorBase') so that it could be re-used across IGC.
    /// Long-term, we could also propose this general use-based visitor to the
    /// LLVM community as an extension/replacement for 'PtrUseVisitor'.
    ///
    /// TODO: Given that PrivateMemoryResolution also relies on SOA layout
    /// evaluation, it would be better to factor this out into a separate
    /// analysis pass.
    class SOALayoutChecker : public llvm::InstVisitor<SOALayoutChecker, bool>
    {
    public:
        friend llvm::InstVisitor<SOALayoutChecker, bool>;

        // isOCL is for testing, it will be removed once testing is done.
        SOALayoutChecker(llvm::AllocaInst& allocaToCheck, bool isOCL);
        SOALayoutChecker() = delete;
        ~SOALayoutChecker() = default;
        SOALayoutChecker(SOALayoutChecker&) = delete;

        SOALayoutInfo getOrGatherInfo();

        // for new algo
        bool useNewAlgo(llvm::Type* baseTy) const {
            return (newAlgoControl > 1 ||
                (newAlgoControl == 1 && baseTy->isStructTy()));
        }

    private:
        llvm::AllocaInst& allocaRef;
        const llvm::DataLayout* pDL;
        std::unique_ptr<SOALayoutInfo> pInfo;

        // ===== fields for new algo =====
        // todo: combine the new and old together
        //
        // SOAPartitionBytes : the size of chunk used to divide a buffer
        //   into a sequence of chunks. It should be a power-of-2 number
        //   with minimum value being 4. It is selected to be the larger
        //   of 4 and size of any scalar element type.
        uint32_t SOAPartitionBytes = 4;
        // newAlgoControl
        //   The old algo : array of DW[xn]
        //   the new algo : array of DW{xn], array of QW[xn],
        //                  array of structs.
        //    0 : disable new algorithm
        //    1 : enable new algorithm for array of simple struct
        //    2 : enable new algorithm for array of simple struct
        //        array of dw[xn], array of qw[xn]
        //        (not splitting vector, intend to replace the old algo)
        //    3 : 2 plus array of more complicated structs.
        int newAlgoControl = 0;
        uint32_t selectPartitionSize(llvm::Type* Ty);
        // Return true if struct can be transposed
        bool checkStruct(llvm::StructType* StTy);
        // ===== end of fields for new algo =====

        bool isVectorSOA = true;
        llvm::Instruction* parentLevelInst = nullptr;

        /// This method visits the direct users of the instruction, abrupting
        /// traversal if a false value is returned by any of the visits.
        ///
        /// TODO: Consider a worklist-based implementation instead. If achieved, it
        /// would make sense to rename the method into `enqueueUsers', in adherence with
        /// the convention set by llvm::PtrUseVisitor.
        bool checkUsers(llvm::Instruction& I);

        /// Given the purpose of the analysis, we can only handle memory
        /// instructions. Default to 'false'.
        bool visitInstruction(llvm::Instruction& I) { return false; }

        bool visitBitCastInst(llvm::BitCastInst&);
        bool visitGetElementPtrInst(llvm::GetElementPtrInst&);
        bool visitIntrinsicInst(llvm::IntrinsicInst&);
        bool visitLoadInst(llvm::LoadInst&);
        bool visitStoreInst(llvm::StoreInst&);
    };

    class TransposeHelper
    {
    public:
        TransposeHelper(const llvm::DataLayout& DL, bool vectorIndex)
            : m_vectorIndex(vectorIndex)
            , m_DL(DL)
        {}
        void HandleAllocaSources(
            llvm::Instruction* v,
            llvm::Value* idx);
        void handleGEPInst(
            llvm::GetElementPtrInst* pGEP,
            llvm::Value* idx);
        // Temporary, this is to replace HandleGEPInst
        void handleGEPInstNew(
            llvm::GetElementPtrInst* pGEP,
            llvm::Value* idx);
        virtual void handleLoadInst(
            llvm::LoadInst* pLoad,
            llvm::Value* pScalarizedIdx) = 0;
        virtual void handleStoreInst(
            llvm::StoreInst* pStore,
            llvm::Value* pScalarizedIdx) = 0;
        virtual void handleLifetimeMark(llvm::IntrinsicInst* inst) = 0;
        // For select handleGEPinstNew
        virtual bool useNewAlgo() { return false; }
        void EraseDeadCode();
    protected:
        const llvm::DataLayout& m_DL;
        std::vector<llvm::Instruction*> m_toBeRemovedGEP;
    private:
        bool m_vectorIndex;
        std::pair<unsigned int, llvm::Type*> getArrSizeAndEltType(llvm::Type* T);
    };
}