File: ShaderCodeGen.hpp

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (817 lines) | stat: -rw-r--r-- 31,833 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once

#include "common/LLVMUtils.h"
#include "Compiler/CISACodeGen/DebugInfoData.hpp"
#include "Compiler/CISACodeGen/CVariable.hpp"
#include "Compiler/CISACodeGen/PushAnalysis.hpp"
#include "Compiler/CISACodeGen/helper.h"
#include "Compiler/CISACodeGen/CISACodeGen.h"
#include "Compiler/CISACodeGen/CISABuilder.hpp"
#include "Compiler/CISACodeGen/LiveVars.hpp"
#include "Compiler/CISACodeGen/WIAnalysis.hpp"
#include "Compiler/CISACodeGen/CoalescingEngine.hpp"
#include "Compiler/CodeGenPublic.h"
#include "Compiler/MetaDataApi/MetaDataApi.h"
// Needed for SConstantGatherEntry
#include "usc_gen7.h"
#include "common/Types.hpp"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/ADT/DenseMap.h>
#include <llvm/ADT/MapVector.h>
#include "common/LLVMWarningsPop.hpp"
#include "common/debug/Dump.hpp"
#include <map>
#include <string>
#include <vector>
#include "Probe/Assertion.h"

namespace llvm
{
    class Value;
    class PHINode;
    class Function;
    class BasicBlock;
}

namespace IGC
{
class DeSSA;
class CoalescingEngine;
class GenXFunctionGroupAnalysis;
class VariableReuseAnalysis;

struct PushInfo;

// Helper Function
VISA_Type GetType(llvm::Type* pType, CodeGenContext* pDataLayout);
uint64_t GetImmediateVal(llvm::Value* Const);
e_alignment GetPreferredAlignment(llvm::Value* Val, WIAnalysis* WIA, CodeGenContext* pContext);

class CShaderProgram;

///--------------------------------------------------------------------------------------------------------
class CShader
{
public:
    friend class CShaderProgram;

    class ExtractMaskWrapper
    {
        // To enable ExtractMask of any vector size. Currently, only vector
        // whose size is no larger than 32 has its extractMask calculated.
    private:
        uint32_t m_EM;     // 32 bit extractMask;
        bool     m_hasEM;  // If true, m_EM is valid; otherwise, not valid.
    public:
        ExtractMaskWrapper(CShader* pS, llvm::Value* VecVal);

        ExtractMaskWrapper() = delete;
        ExtractMaskWrapper(const ExtractMaskWrapper&) = delete;
        ExtractMaskWrapper& operator=(const ExtractMaskWrapper&) = delete;

        // b: bit position, from 0 to 31.
        bool isSet(uint32_t b) const
        {
            if (m_hasEM) {
                IGC_ASSERT(b < 32);
                return (1 << (b)) & m_EM;
            }
            return true;
        }

        uint32_t getEM() const { return m_EM; }
        uint16_t hasEM() const { return m_hasEM; }
    };

    CShader(llvm::Function*, CShaderProgram* pProgram);
    virtual ~CShader();
    void        Destroy();
    virtual void InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, ShaderDispatchMode shaderMode = ShaderDispatchMode::NOT_APPLICABLE);
    virtual void PreCompile() {}
    virtual void PreCompileFunction(llvm::Function& F) { IGC_UNUSED(F); }
    virtual void ParseShaderSpecificOpcode(llvm::Instruction* inst) { IGC_UNUSED(inst); }
    virtual void AllocatePayload() {}
    virtual void AddPrologue() {}
    virtual void PreAnalysisPass();
    virtual void ExtractGlobalVariables() {}
    void         EOTURBWrite();
    void         EOTRenderTarget(CVariable* r1, bool isPerCoarse);
    CVariable* URBFence();
    void         EOTGateway(CVariable* payload = nullptr);
    virtual void AddEpilogue(llvm::ReturnInst* ret);

    virtual CVariable* GetURBOutputHandle()
    {
        IGC_ASSERT_MESSAGE(0, "Should be overridden in a derived class!");
        return nullptr;
    }
    virtual CVariable* GetURBInputHandle(CVariable* pVertexIndex)
    {
        IGC_UNUSED(pVertexIndex);
        IGC_ASSERT_MESSAGE(0, "Should be overridden in a derived class!");
        return nullptr;
    }

    virtual CVariable* GetGlobalBufferPtr() { IGC_ASSERT(0); return nullptr; }
    virtual CVariable* GetLocalBufferPtr() { IGC_ASSERT(0); return nullptr; }
    virtual CVariable* GetStackID() { IGC_ASSERT(0); return nullptr; }
    virtual CVariable* GetInlinedDataPtr() { IGC_ASSERT(0); return nullptr; }
    // if true, HW will pass one GRF NOS of inlinedata to payload, (compute only right now)

    virtual bool passNOSInlineData() { return false; }
    virtual bool loadThreadPayload() { return false; }
    virtual unsigned getAnnotatedNumThreads() { return 0; }
    virtual bool IsRegularGRFRequested() { return false; }
    virtual bool IsLargeGRFRequested() { return false; }
    virtual bool hasReadWriteImage(llvm::Function& F)
    {
        IGC_UNUSED(F);
        return false;
    }
    virtual bool CompileSIMDSize(SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
    {
        IGC_UNUSED(F);
        IGC_UNUSED(EP);
        return CompileSIMDSizeInCommon(simdMode);
    }
    CVariable* LazyCreateCCTupleBackingVariable(
        CoalescingEngine::CCTuple* ccTuple,
        VISA_Type baseType = ISA_TYPE_UD);
    CVariable* GetSymbol(llvm::Value* value, bool fromConstantPool = false);
    void        AddSetup(uint index, CVariable* var);
    bool        AppendPayloadSetup(CVariable* var);
    void        AddPatchTempSetup(CVariable* var);
    void        AddPatchConstantSetup(uint index, CVariable* var);

    // TODO: simplify calls to GetNewVariable to these shorter and more
    // expressive cases where possible.
    //
    // CVariable* GetNewVector(VISA_Type type, const CName &name) {
    //     return GetNewVariable(numLanes(m_SIMDSize), type, EALIGN_GRF, false, name);
    // }
    // CVariable* GetNewUniform(VISA_Type type, const CName &name) {
    //    grep a GetNewVariable(1, .. true) and see what B and W use
    //     return GetNewVariable(1, type, alignOf_TODO(type), true, name);
    // }

    CVariable* GetNewVariable(
        uint16_t nbElement,
        VISA_Type type,
        e_alignment align,
        const CName &name)
    {
        return GetNewVariable(nbElement, type, align, false, 1, name);
    }
    CVariable* GetNewVariable(
        uint16_t nbElement,
        VISA_Type type,
        e_alignment align,
        UniformArgWrap uniform,
        const CName &name)
    {
        return GetNewVariable(nbElement, type, align, uniform, 1, name);
    }
    CVariable* GetNewVariable(
        uint16_t nbElement,
        VISA_Type type,
        e_alignment align,
        UniformArgWrap uniform,
        uint16_t numberInstance,
        const CName &name);
    CVariable* GetNewVariable(const CVariable* from);
    CVariable* GetNewAddressVariable(
        uint16_t nbElement,
        VISA_Type type,
        UniformArgWrap uniform,
        bool vectorUniform,
        const CName &name);
    CVariable* GetNewVector(llvm::Value* val, e_alignment preferredAlign = EALIGN_AUTO);
    CVariable* GetNewAlias(CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements);
    CVariable* GetNewAlias(CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements, bool uniform);

    // If BaseVar's type matches V's, return BaseVar; otherwise, create an new
    // alias CVariable to BaseVar. The newly-created alias CVariable's size
    // should be the same as BaseVar's size (used for creating alias for values
    // in the same DeSSA's congruent class).
    CVariable* createAliasIfNeeded(llvm::Value* V, CVariable* BaseVar);
    // Allow to create an alias of a variable handpicking a slice to be able to do cross lane in SIMD32
    CVariable* GetVarHalf(CVariable* var, unsigned int half);

    void        CopyVariable(CVariable* dst, CVariable* src, uint dstSubVar = 0, uint srcSubVar = 0);
    void        PackAndCopyVariable(CVariable* dst, CVariable* src, uint subVar = 0);
    void        CopyVariableRaw(CVariable* dst, CVariable* src);
    CVariable*  CopyVariableRaw(CVariable* src, bool singleInstance = true);
    bool        IsValueUsed(llvm::Value* value);
    CVariable*  GetGlobalCVar(llvm::Value* value);
    uint        GetNbElementAndMask(llvm::Value* value, uint32_t& mask);
    void        CreatePayload(uint regCount, uint idxOffset, CVariable*& payload, llvm::Instruction* inst, uint paramOffset, uint8_t hfFactor);
    uint        GetNbVectorElementAndMask(llvm::Value* value, uint32_t& mask);
    uint16_t    AdjustExtractIndex(llvm::Value* value, uint16_t elemIndex);
    WIBaseClass::WIDependancy GetDependency(llvm::Value* v) const;
    void        SetDependency(llvm::Value* v, WIBaseClass::WIDependancy dep);
    bool        GetIsUniform(llvm::Value* v) const;
    bool        InsideDivergentCF(const llvm::Instruction* inst) const;
    bool        InsideWorkgroupDivergentCF(const llvm::Instruction* inst) const;
    CEncoder& GetEncoder();
    CVariable* GetR0();
    CVariable* GetNULL();
    CVariable* GetTSC();
    CVariable* GetSR0();
    CVariable* GetCR0();
    CVariable* GetCE0();
    CVariable* GetDBG();
    CVariable* GetMSG0();
    CVariable* GetHWTID();
    CVariable* GetSP();
    CVariable* GetFP();
    CVariable* GetPrevFP();
    CVariable* GetARGV();
    CVariable* GetRETV();
    CVariable* GetPrivateBase();
    CVariable* GetImplArgBufPtr();
    CVariable* GetLocalIdBufPtr();
    void SaveSRet(CVariable* sretPtr);
    CVariable* GetAndResetSRet();

    bool hasSP() const { return m_SP != nullptr; }
    bool hasFP() const { return m_FP != nullptr; }

    void InitializeStackVariables();
    void SaveStackState();
    void RestoreStackState();

    void InitializeScratchSurfaceStateAddress();

    void RemoveBitRange(CVariable*& src, unsigned removebit, unsigned range);

    void        AllocateInput(CVariable* var, uint offset, uint instance = 0, bool forceLiveOut = false);
    void        AllocateOutput(CVariable* var, uint offset, uint instance = 0);
    CVariable* ImmToVariable(uint64_t immediate, VISA_Type type, bool isCodePatchCandidate = false);
    CVariable* GetConstant(llvm::Constant* C, CVariable* dstVar = nullptr);
    CVariable* GetScalarConstant(llvm::Value* c);
    CVariable* GetUndef(VISA_Type type);
    llvm::Constant* findCommonConstant(llvm::Constant* C, uint elts, uint currentEmitElts, bool& allSame);
    virtual unsigned int GetGlobalMappingValue(llvm::Value* c);
    virtual CVariable* GetGlobalMapping(llvm::Value* c);
    CVariable* BitCast(CVariable* var, VISA_Type newType);
    void        CacheArgumentsList();
    virtual void MapPushedInputs();
    void        CreateGatherMap();
    void        CreateConstantBufferOutput(SKernelProgram* pKernelProgram);
    void        CreateFunctionSymbol(llvm::Function* pFunc);
    void        CreateGlobalSymbol(llvm::GlobalVariable* pGlobal);

    CVariable*  GetStructVariable(llvm::Value* v, bool forceVectorInit = false);

    void        CreateImplicitArgs();
    void        CreateAliasVars();
    uint        GetNumSBlocks() { return m_numBlocks; }

    void        SetUniformHelper(WIAnalysis* WI) { m_WI = WI; }
    void        SetDeSSAHelper(DeSSA* deSSA) { m_deSSA = deSSA; }
    void        SetCoalescingEngineHelper(CoalescingEngine* ce) { m_coalescingEngine = ce; }
    void        SetCodeGenHelper(CodeGenPatternMatch* CG) { m_CG = CG; }
    void        SetPushInfoHelper(PushInfo* PI) { pushInfo = *PI; }
    void        SetDominatorTreeHelper(llvm::DominatorTree* DT) { m_DT = DT; }
    void        SetDataLayout(const llvm::DataLayout* DL) { m_DL = DL; }
    void        SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis* FGA) { m_FGA = FGA; }
    void        SetVariableReuseAnalysis(VariableReuseAnalysis* VRA) { m_VRA = VRA; }
    void        SetMetaDataUtils(IGC::IGCMD::MetaDataUtils* pMdUtils) { m_pMdUtils = pMdUtils; }
    void        SetScratchSpaceSize(uint size) { m_ScratchSpaceSize = size; }
    IGCMD::MetaDataUtils* GetMetaDataUtils() { return m_pMdUtils; }

    virtual  void SetShaderSpecificHelper(EmitPass* emitPass) { IGC_UNUSED(emitPass); }

    void        AllocateConstants(uint& offset);
    void        AllocateSimplePushConstants(uint& offset);
    void        AllocateNOSConstants(uint& offset);
    void        AllocateConstants3DShader(uint& offset);
    ShaderType  GetShaderType() const { return GetContext()->type; }
    bool        IsPatchablePS();

    bool        GetHasBarrier() const { return m_BarrierNumber > 0; }
    void        SetHasBarrier() { if (m_BarrierNumber == 0) m_BarrierNumber = 1; }
    void        SetBarrierNumber(int BarrierNumber) { m_BarrierNumber = BarrierNumber; }
    int         GetBarrierNumber() const { return m_BarrierNumber; }

    void        GetSimdOffsetBase(CVariable*& pVar, bool dup = false);
    /// Returns a simd8 register filled with values [24, 20, 16, 12, 8, 4, 0]
    /// that are used to index subregisters of a GRF when counting offsets in bytes.
    /// Used e.g. for indirect addressing via a0 register.
    CVariable* GetPerLaneOffsetsReg(uint typeSizeInBytes);

    void        GetPayloadElementSymbols(llvm::Value* inst, CVariable* payload[], int vecWidth);

    CodeGenContext* GetContext() const { return m_ctx; }

    SProgramOutput* ProgramOutput();

    bool CanTreatAsAlias(llvm::ExtractElementInst* inst);
    bool CanTreatScalarSourceAsAlias(llvm::InsertElementInst*);

    bool HasBecomeNoop(llvm::Instruction* inst);

    // If V is not in any congruent class, not aliased to any other
    // variables, not payload-coalesced, then this function returns
    // true.
    bool IsCoalesced(llvm::Value* V);

    bool VMECoalescePattern(llvm::GenIntrinsicInst*);

    bool isUnpacked(llvm::Value* value);

    /// Return true if we are sure that all lanes are active at the begging of the thread
    virtual bool HasFullDispatchMask() { return false; }
    bool needsEntryFence() const;

    std::pair<bool, unsigned> getExtractMask(Value *V) const {
        auto It = extractMasks.find(V);
        if (It == extractMasks.end())
            return std::make_pair(false, 0);
        return std::make_pair(true, It->second);
    }

    llvm::Function* entry = nullptr;
    const CBTILayout* m_pBtiLayout = nullptr;
    const CPlatform* m_Platform = nullptr;
    const CDriverInfo* m_DriverInfo = nullptr;

    ModuleMetaData* m_ModuleMetadata = nullptr;

    /// Dispatch size is the number of logical threads running in one hardware thread
    SIMDMode m_dispatchSize;
    /// SIMD Size is the default size of instructions
    ShaderDispatchMode m_ShaderDispatchMode;
    /// the default emit size for this shader. This is the default size for variables as well
    /// as the default execution size for each instruction. encoder may override it explicitly
    /// via CEncoder::SetSIMDSize
    SIMDMode m_SIMDSize;
    uint8_t m_numberInstance = 0;
    PushInfo pushInfo;
    bool isInputsPulled; //true if any input is pulled, false otherwise
    bool isMessageTargetDataCacheDataPort;
    uint m_sendStallCycle = 0;
    uint m_staticCycle = 0;
    uint m_loopNestedStallCycle = 0;
    uint m_loopNestedCycle= 0;
    unsigned m_spillSize = 0;
    float m_spillCost = 0;          // num weighted spill inst / total inst

    std::vector<llvm::Value*> m_argListCache;

    /// The size in byte used by igc (non-spill space). And this
    /// is the value passed to VISA so that VISA's spill, if any,
    /// will go after this space.
    uint m_ScratchSpaceSize = 0;

    CVariable* m_ScratchSurfaceAddress = nullptr;

    ShaderStats* m_shaderStats = nullptr;

    // Number of binding table entries per cache line.
    static constexpr DWORD cBTEntriesPerCacheLine = 32;
    // Max BTI value that can increase binding table count.
    // SampleEngine:    Binding Table Index is set to 252 specifies the bindless surface offset.
    // DataPort:        The special entry 255 is used to reference Stateless A32 or A64 address model,
    //                  and the special entry 254 is used to reference the SLM address model.
    //                  The special entry 252 is used to reference bindless resource operation.
    static constexpr DWORD MAX_BINDING_TABLE_INDEX = 251;
    static constexpr uint cMessageExtendedDescriptorEOTBit = BIT(5);

    CVariable* GetCCTupleToVariableMapping(CoalescingEngine::CCTuple* ccTuple)
    {
        return ccTupleMapping[ccTuple];
    }

    void addConstantInPool(llvm::Constant* C, CVariable* Var) {
        ConstantPool[C] = Var;
    }

    CVariable* lookupConstantInPool(llvm::Constant* C) {
        return ConstantPool.lookup(C);
    }

    unsigned int EvaluateSIMDConstExpr(llvm::Value* C);

    /// Initialize per function status.
    void BeginFunction(llvm::Function* F);
    // This method split payload interpolations from the shader into another compilation unit
    void SplitPayloadFromShader(llvm::Function* F);
    /// This method is used to create the vISA variable for function F's formal return value
    CVariable* getOrCreateReturnSymbol(llvm::Function* F);
    /// This method is used to create the vISA variable for function F's formal argument
    CVariable* getOrCreateArgumentSymbol(
        llvm::Argument* Arg,
        bool ArgInCallee, // true if Arg isn't in current func
        bool useStackCall = false);
    void UpdateSymbolMap(llvm::Value* v, CVariable* CVar);
    VISA_Type GetType(llvm::Type* type);
    uint32_t GetNumElts(llvm::Type* type, bool isUniform = false);

    /// Evaluate constant expression and return the result immediate value.
    uint64_t GetConstantExpr(llvm::ConstantExpr* C);


    uint32_t GetMaxUsedBindingTableEntryCount(void) const
    {
        if (m_BindingTableUsedEntriesBitmap != 0)
        {
            // m_BindingTableEntryCount is index; '+ 1' due to calculate total used count.
            return (m_BindingTableEntryCount + 1);
        }
        return 0;
    }

    uint32_t GetBindingTableEntryBitmap(void) const
    {
        return m_BindingTableUsedEntriesBitmap;
    }

    void SetBindingTableEntryCountAndBitmap(bool directIdx, BufferType bufType, uint32_t typeBti, uint32_t bti)
    {
        if (bti <= MAX_BINDING_TABLE_INDEX)
        {
            if (directIdx)
            {
                m_BindingTableEntryCount = (bti <= m_pBtiLayout->GetBindingTableEntryCount()) ? (std::max(bti, m_BindingTableEntryCount)) : m_BindingTableEntryCount;
                m_BindingTableUsedEntriesBitmap |= BIT(bti / cBTEntriesPerCacheLine);

                if (bufType == RESOURCE)
                {
                    m_shaderResourceLoaded[typeBti / 32] |= BIT(typeBti % 32);
                }
                else if (bufType == CONSTANT_BUFFER)
                {
                    m_constantBufferLoaded |= BIT(typeBti);
                }
                else if (bufType == UAV)
                {
                    m_uavLoaded |= QWBIT(typeBti);
                }
                else if (bufType == RENDER_TARGET)
                {
                    m_renderTargetLoaded |= BIT(typeBti);
                }
            }
            else
            {
                // Indirect addressing, set the maximum BTI.
                m_BindingTableEntryCount = m_pBtiLayout->GetBindingTableEntryCount();
                m_BindingTableUsedEntriesBitmap |= BITMASK_RANGE(0, (m_BindingTableEntryCount / cBTEntriesPerCacheLine));

                if (bufType == RESOURCE || bufType == BINDLESS_TEXTURE)
                {
                    unsigned int MaxArray = m_pBtiLayout->GetTextureIndexSize() / 32;
                    for (unsigned int i = 0; i < MaxArray; i++)
                    {
                        m_shaderResourceLoaded[i] = 0xffffffff;
                    }

                    for (unsigned int i = MaxArray * 32; i < m_pBtiLayout->GetTextureIndexSize(); i++)
                    {
                        m_shaderResourceLoaded[MaxArray] = BIT(i % 32);
                    }
                }
                else if (bufType == CONSTANT_BUFFER || bufType == BINDLESS_CONSTANT_BUFFER)
                {
                    m_constantBufferLoaded |= BITMASK_RANGE(0, m_pBtiLayout->GetConstantBufferIndexSize());
                }
                else if (bufType == UAV || bufType == BINDLESS)
                {
                    m_uavLoaded |= QWBITMASK_RANGE(0, m_pBtiLayout->GetUavIndexSize());
                }
                else if (bufType == RENDER_TARGET)
                {
                    m_renderTargetLoaded |= BITMASK_RANGE(0, m_pBtiLayout->GetRenderTargetIndexSize());
                }
            }
        }
    }

    /// Evaluate the Sampler Count field value.
    unsigned int GetSamplerCount(unsigned int samplerCount);

    static unsigned GetIMEReturnPayloadSize(llvm::GenIntrinsicInst* I);

    void addCVarsForVectorBC(llvm::BitCastInst* BCI, llvm::SmallVector<CVariable*, 8> CVars)
    {
        IGC_ASSERT_MESSAGE(m_VectorBCItoCVars.find(BCI) == std::end(m_VectorBCItoCVars), "a variable already exists for this vector bitcast");
        m_VectorBCItoCVars.try_emplace(BCI, CVars);
    }

    CVariable* getCVarForVectorBCI(llvm::BitCastInst* BCI, int index)
    {
        auto iter = m_VectorBCItoCVars.find(BCI);
        if (iter == m_VectorBCItoCVars.end())
        {
            return nullptr;
        }
        return (*iter).second[index];
    }

    void SetHasGlobalStatelessAccess() { m_HasGlobalStatelessMemoryAccess = true; }
    bool GetHasGlobalStatelessAccess() const { return m_HasGlobalStatelessMemoryAccess; }
    void SetHasConstantStatelessAccess() { m_HasConstantStatelessMemoryAccess = true; }
    bool GetHasConstantStatelessAccess() const { return m_HasConstantStatelessMemoryAccess; }
    void SetHasGlobalAtomics() { m_HasGlobalAtomics = true; }
    bool GetHasGlobalAtomics() const { return m_HasGlobalAtomics; }
    bool GetHasDPAS() const { return m_HasDPAS; }
    void SetHasDPAS() { m_HasDPAS = true; }
    bool GetHasEval() const { return m_HasEval; }
    void SetHasEval() { m_HasEval = true; }
    void IncStatelessWritesCount() { ++m_StatelessWritesCount; }
    void IncIndirectStatelessCount() { ++m_IndirectStatelessCount; }
    uint32_t GetStatelessWritesCount() const { return m_StatelessWritesCount; }
    uint32_t GetIndirectStatelessCount() const { return m_IndirectStatelessCount; }

    // In bytes
    uint32_t getGRFSize() const { return m_Platform->getGRFSize(); }
    // in DWORDs
    uint32_t getMinPushConstantBufferAlignmentInBytes() const { return m_Platform->getMinPushConstantBufferAlignment() * sizeof(DWORD); }

    // Note that for PVC A0 simd16, PVCLSCEnabled() returns true
    // but no LSC is generated!
    bool PVCLSCEnabled() const {
        return m_Platform->isCoreChildOf(IGFX_XE_HPC_CORE) && m_Platform->hasLSC();
    }

    e_alignment getGRFAlignment() const { return CVariable::getAlignment(getGRFSize()); }

    llvm::DenseMap<llvm::Value*, CVariable*>& GetSymbolMapping()
    {
        return symbolMapping;
    }

    llvm::DenseMap<llvm::Value*, CVariable*>& GetGlobalMapping()
    {
        return globalSymbolMapping;
    }

    llvm::DenseMap<llvm::Constant*, CVariable*>& GetConstantMapping()
    {
        return ConstantPool;
    }

    int64_t GetKernelArgOffset(CVariable* argV)
    {
        auto it = kernelArgToPayloadOffsetMap.find(argV);
        return it != kernelArgToPayloadOffsetMap.end() ? (int64_t) it->second : -1;
    }

    DebugInfoData& GetDebugInfoData();

    unsigned int GetPrimitiveTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
    unsigned int GetPrimitiveTypeSizeInRegister(const llvm::Type* Ty) const;
    unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
    unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const;

    bool HasStackCalls() const { return m_HasStackCalls; }
    void SetHasStackCalls() { m_HasStackCalls = true; }
    bool IsIntelSymbolTableVoidProgram() const { return m_isIntelSymbolTableVoidProgram; }
    void SetIsIntelSymbolTableVoidProgram() { m_isIntelSymbolTableVoidProgram = true; }

    ////////////////////////////////////////////////////////////////////
    // NOTE: for vector load/stores instructions pass the
    // optional instruction argument checks additional constraints
    static Tristate shouldGenerateLSCQuery(
        const CodeGenContext& Ctx,
        llvm::Instruction* vectorLdStInst = nullptr,
        SIMDMode Mode = SIMDMode::UNKNOWN);
    bool shouldGenerateLSC(llvm::Instruction* vectorLdStInst = nullptr);
    bool forceCacheCtrl(llvm::Instruction* vectorLdStInst = nullptr);
    uint32_t totalBytesToStoreOrLoad(llvm::Instruction* vectorLdStInst);

    void setShaderProgramID(int aID) { m_shaderProgramID = aID; }
    int getShaderProgramID() const { return m_shaderProgramID; }
    void getShaderFileName(std::string& ShaderName) const;

protected:
    bool CompileSIMDSizeInCommon(SIMDMode simdMode);
    uint32_t GetShaderThreadUsageRate();
private:
    int m_shaderProgramID = 0;   // unique for each shaderProgram
    // Return DefInst's CVariable if it could be reused for UseInst, and return
    // nullptr otherwise.
    CVariable* reuseSourceVar(llvm::Instruction* UseInst,
        llvm::Instruction* DefInst,
        e_alignment preferredAlign);

    // Return nullptr if no source variable is reused. Otherwise return a
    // CVariable from its source operand.
    CVariable* GetSymbolFromSource(llvm::Instruction* UseInst,
        e_alignment preferredAlign);

protected:
    CShaderProgram* m_parent;
    CodeGenContext* m_ctx;
    WIAnalysis* m_WI;
    DeSSA* m_deSSA;
    CoalescingEngine* m_coalescingEngine;
    CodeGenPatternMatch* m_CG;
    llvm::DominatorTree* m_DT;
    const llvm::DataLayout* m_DL;
    GenXFunctionGroupAnalysis* m_FGA;
    VariableReuseAnalysis* m_VRA;

    uint m_numBlocks;
    IGC::IGCMD::MetaDataUtils* m_pMdUtils;

#if defined(_DEBUG) || defined(_INTERNAL)
    llvm::SpecificBumpPtrAllocator<CVariable> Allocator;
#else
    llvm::BumpPtrAllocator Allocator;
#endif

    // Mapping from formal argument to its variable or from function to its
    // return variable. Per kernel mapping. Used when llvm functions are
    // compiled into vISA subroutine
    llvm::DenseMap<llvm::Value*, CVariable*> globalSymbolMapping;

    llvm::DenseMap<llvm::Value*, CVariable*> symbolMapping;
    // Yet another map: a mapping from ccTuple to its corresponding root variable.
    // Variables that participate in congruence class tuples will be defined as
    // aliases (with respective offset) to the root variable.
    llvm::DenseMap<CoalescingEngine::CCTuple*, CVariable*> ccTupleMapping;
    // Constant pool.
    llvm::DenseMap<llvm::Constant*, CVariable*> ConstantPool;

    // keep a map when we generate accurate mask for vector value
    // in order to reduce register usage
    llvm::DenseMap<llvm::Value*, uint32_t> extractMasks;

    // keep a map for each kernel argument to its allocated payload offset
    llvm::DenseMap<CVariable*, uint32_t> kernelArgToPayloadOffsetMap;

    CEncoder encoder;
    std::vector<CVariable*> setup;
    std::vector<CVariable*> payloadLiveOutSetup;
    std::vector<CVariable*> payloadTempSetup;
    std::vector<CVariable*> patchConstantSetup;
    std::vector<CVariable*> perPrimitiveSetup;

    uint m_maxBlockId;

    CVariable* m_R0;
    CVariable* m_NULL;
    CVariable* m_TSC;
    CVariable* m_SR0;
    CVariable* m_CR0;
    CVariable* m_CE0;
    CVariable* m_MSG0;
    CVariable* m_DBG;
    CVariable* m_HW_TID;
    CVariable* m_SP;
    CVariable* m_FP;
    CVariable* m_SavedFP;
    CVariable* m_ARGV;
    CVariable* m_RETV;
    CVariable* m_SavedSRetPtr;
    CVariable* m_ImplArgBufPtr;
    CVariable* m_LocalIdBufPtr;

    std::vector<USC::SConstantGatherEntry> gatherMap;
    uint     m_ConstantBufferLength;
    uint     m_constantBufferMask;
    uint     m_constantBufferLoaded;
    uint64_t m_uavLoaded;
    uint     m_shaderResourceLoaded[4];
    uint     m_renderTargetLoaded;

    int  m_cbSlot;
    uint m_statelessCBPushedSize;
    uint m_NOSBufferSize = 0;

    /// holds max number of inputs that can be pushed for this shader unit
    static const uint32_t m_pMaxNumOfPushedInputs;

    int m_BarrierNumber;
    SProgramOutput m_simdProgram;

    // Holds max used binding table entry index.
    uint32_t m_BindingTableEntryCount;

    // Holds binding table entries bitmap.
    uint32_t m_BindingTableUsedEntriesBitmap;

    // for each vector BCI whose uses are all extractElt with imm offset,
    // we store the CVariables for each index
    llvm::DenseMap<llvm::Instruction*, llvm::SmallVector<CVariable*, 8>> m_VectorBCItoCVars;

    // Those two are for stateful token setup. It is a quick
    // special case checking. Once a generic approach is added,
    // this two fields shall be retired.
    bool m_HasGlobalStatelessMemoryAccess;
    bool m_HasConstantStatelessMemoryAccess;

    bool m_HasGlobalAtomics = false;

    bool m_HasDPAS = false;
    bool m_HasEval = false;
    bool m_passNOSInlinedata = false;

    uint32_t m_StatelessWritesCount = 0;
    uint32_t m_IndirectStatelessCount = 0;

    DebugInfoData diData;

    bool m_HasStackCalls = false;
    bool m_isIntelSymbolTableVoidProgram = false;
};

/// This class contains the information for the different SIMD version
/// of a kernel. Each kernel in the module is associated to one CShaderProgram
class CShaderProgram
{
public:
    typedef llvm::MapVector<llvm::Function*, CShaderProgram*> KernelShaderMap;
    CShaderProgram(CodeGenContext* ctx, llvm::Function* kernel);
    ~CShaderProgram();
    CShader* GetOrCreateShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE);
    CShader* GetShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE);
    void DeleteShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE);
    CodeGenContext* GetContext() { return m_context; }

    llvm::Function* getLLVMFunction() const { return m_kernel; }
    ShaderStats* m_shaderStats;

    // invoked to clear Func ptr when the current module is deleted (so is func within it).
    void clearBeforeRetry() {
        m_kernel = nullptr;
        for (auto S : m_SIMDshaders) {
            if (S != nullptr) {
                S->entry = nullptr;
            }
        }
    }

protected:
    CShader*& GetShaderPtr(SIMDMode simd, ShaderDispatchMode mode);
    CShader* CreateNewShader(SIMDMode simd);
    void ClearShaderPtr(SIMDMode simd);

    inline bool hasShaderOutput(CShader* shader)
    {
        return (shader && shader->ProgramOutput()->m_programSize > 0);
    }

    inline void freeShaderOutput(CShader* shader)
    {
        if (hasShaderOutput(shader))
        {
            IGC::aligned_free(shader->ProgramOutput()->m_programBin);
            shader->ProgramOutput()->m_programSize = 0;
        }
    }

    CodeGenContext* m_context;
    llvm::Function* m_kernel;
    std::array<CShader*, 8> m_SIMDshaders;
};

struct SInstContext
{
    CVariable* flag;
    e_modifier dst_mod;
    bool invertFlag;
    void init()
    {
        flag = NULL;
        dst_mod = EMOD_NONE;
        invertFlag = false;
    }
};

static const SInstContext g_InitContext =
{
    NULL,
    EMOD_NONE,
    false,
};

struct PSSignature;

void AddCodeGenPasses(
    CodeGenContext& ctx,
    CShaderProgram::KernelShaderMap& shaders,
    IGCPassManager& Passes,
    SIMDMode simdMode,
    bool canAbortOnSpill,
    ShaderDispatchMode shaderMode = ShaderDispatchMode::NOT_APPLICABLE,
    PSSignature* pSignature = nullptr);


bool SimdEarlyCheck(CodeGenContext* ctx);
bool ForceSimdWA(ComputeShaderContext& ctx, SIMDMode& forceSimd, SIMDMode minSimdMode, SIMDMode maxSimdMode);
void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature* pSignature = nullptr);
void AddAnalysisPasses(CodeGenContext& ctx, IGCPassManager& mpm);
void destroyShaderMap(CShaderProgram::KernelShaderMap& shaders);
void unify_opt_PreProcess(CodeGenContext* pContext);
}