File: CISABuilder.hpp

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (1182 lines) | stat: -rw-r--r-- 48,185 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once

#include "Compiler/CISACodeGen/CISACodeGen.h"
#include "Compiler/CISACodeGen/CVariable.hpp"
#include "Compiler/CISACodeGen/PatternMatchPass.hpp"
#include "Compiler/CISACodeGen/helper.h"
#include "visa_wa.h"
#include "inc/common/sku_wa.h"

namespace IGC
{
    class CShader;

    struct SFlag
    {
        CVariable* var;
        e_predMode mode;
        bool invertFlag;
        void init()
        {
            var = NULL;
            mode = EPRED_NORMAL;
            invertFlag = false;
        }
    };

    struct SModifier
    {
        uint16_t subReg;
        uint8_t subVar;
        uint8_t region[3];
        e_modifier mod;
        e_instance instance;
        bool specialRegion;
        void init()
        {
            mod = EMOD_NONE;
            subVar = 0;
            subReg = 0;
            instance = EINSTANCE_UNSPECIFIED;
            specialRegion = false;
        }
    };

    struct SAlias
    {
        CVariable* m_rootVar;
        VISA_Type  m_type;
        SAlias(CVariable* var, VISA_Type type) :
            m_rootVar(var), m_type(type)
        { }
    };

    struct SAliasMapInfo {
        static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD); }
        static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D); }
        static unsigned getHashValue(const SAlias& Val) {
            return llvm::DenseMapInfo<CVariable*>::getHashValue(Val.m_rootVar) ^ Val.m_type;
        }
        static bool isEqual(const SAlias& LHS, const SAlias& RHS) {
            return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type;
        }
    };

    /// Helps representing URB write channel masks in a way that provides type safety and adapts to
    /// the channel mask format required by V-ISA interface.
    class URBChannelMask
    {
    public:
        explicit URBChannelMask(unsigned int bitmask) : m_bitmask(bitmask) {}

        /// Returns the size of bitmask,
        /// defined as the position of the most significant bit with value 1.
        /// E.g. size(10001) == 5, size(1) == 1 , size(1111) = 4
        size_t size() const;

        /// Returns channel mask in the format expected by V-ISA.
        /// If the mask is full (i.e. consists of all 1) the return value must be 0xFF
        /// that means 'no channel mask'. In other cases it is the actual stored mask
        /// E.g. 1010 asVISAMask --> 1010, 111 asVISAMask --> 11111111 (full mask case)
        unsigned int asVISAMask() const;

        // returns true if all channels are set (i.e., we can skip the channel mask)
        bool isAllSet() const
        {
            return ((m_bitmask + 1) & m_bitmask) == 0;
        }
    private:
        unsigned int m_bitmask;
    };

    struct SEncoderState
    {
        SModifier m_srcOperand[4];
        SModifier m_dstOperand;
        SFlag     m_flag;
        SIMDMode  m_simdSize;
        SIMDMode  m_uniformSIMDSize;
        e_mask    m_mask;
        bool      m_noMask;
        bool      m_SubSpanDestination;
        bool      m_secondHalf;
        bool      m_secondNibble = false;
    };

    class CEncoder
    {
    public:
        void InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall, bool hasAdditionalVisaAsmToLink, VISAKernel* prevKernel);
        void InitBuildParams(llvm::SmallVector<std::unique_ptr< char, std::function<void(char*)>>, 10> & params);
        void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall, bool enableVISA_IR);
        SEncoderState CopyEncoderState();
        void SetEncoderState(SEncoderState& newState);
        VISA_Align GetVISAAlign(CVariable* var);

        void SetAbortOnSpillThreshold(bool canAbortOnSpill, bool AllowSpill);
        void SetDispatchSimdSize();
        void SetSpillMemOffset();
        void SetStackFunctionArgSize(uint size);  // size in GRFs
        void SetStackFunctionRetSize(uint size);  // size in GRFs
        void SetExternFunctionFlag();

        void GetVISAPredefinedVar(CVariable* pVar, PreDefined_Vars var);
        void CreateVISAVar(CVariable* var);
        void DeclareInput(CVariable* var, uint offset, uint instance);
        void MarkAsOutput(CVariable* var);
        void MarkAsPayloadLiveOut(CVariable* var);
        void Compile(bool hasSymbolTable = false);
        std::string GetShaderName();
        void ReportCompilerStatistics(VISAKernel* pMainKernel, SProgramOutput* pOutput);
        int GetThreadCount(SIMDMode simdMode);

        CEncoder();
        ~CEncoder();
        void SetProgram(CShader* program);
        void Jump(CVariable* flag, uint label);
        void Label(uint label);
        uint GetNewLabelID(const CName &name);
        void DwordAtomicRaw(AtomicOp atomic_op,
            const ResourceDescriptor& bindingTableIndex,
            CVariable* dst, CVariable* elem_offset, CVariable* src0,
            CVariable* src1, bool is16Bit = false);
        void AtomicRawA64(AtomicOp atomic_op, const ResourceDescriptor& resource, CVariable* dst,
            CVariable* elem_offset, CVariable* src0, CVariable* src1,
            unsigned short bitwidth);
        void TypedAtomic(
            AtomicOp atomic_op,
            CVariable* dst,
            const ResourceDescriptor& resource,
            CVariable* pU,
            CVariable* pV,
            CVariable* pR,
            CVariable* src0,
            CVariable* src1,
            CVariable* lod,
            bool is16Bit = false);
        void Cmp(e_predicate p, CVariable* dst, CVariable* src0, CVariable* src1);
        void Select(CVariable* flag, CVariable* dst, CVariable* src0, CVariable* src1);
        void GenericAlu(e_opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2 = nullptr);
        void URBWrite(CVariable* src, const int payloadElementOffset, CVariable* offset, CVariable* urbHandle, CVariable* dynamicMask);
        void Send(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor, bool isSendc = false);
        void Send(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor, bool isSendc = false);
        void Sends(CVariable* dst, CVariable* src0, CVariable* src1, uint ffid, CVariable* exDesc, CVariable* messDescriptor, bool isSendc = false, bool hasEOT = false);
        void RenderTargetWrite(CVariable* var[],
            bool isUndefined[],
            bool lastRenderTarget,
            bool isNullRT,
            bool perSample,
            bool coarseMode,
            bool headerMaskFromCe0,
            CVariable* bindingTableIndex,
            CVariable* RTIndex,
            CVariable* source0Alpha,
            CVariable* oMask,
            CVariable* depth,
            CVariable* stencil,
            CVariable* CPSCounter,
            CVariable* sampleIndex,
            CVariable* r1Reg);
        void Sample(
            EOPCODE subOpcode,
            uint writeMask,
            CVariable* offset,
            const ResourceDescriptor& bindingTableIndex,
            const SamplerDescriptor& SamplerIdx,
            uint numSources,
            CVariable* dst,
            llvm::SmallVector<CVariable*, 4> & payload,
            bool zeroLOD,
            bool cpsEnable,
            bool feedbackEnable,
            bool nonUniformState = false);
        void Load(
            EOPCODE subOpcode,
            uint writeMask,
            CVariable* offset,
            const ResourceDescriptor& resource,
            uint numSources,
            CVariable* dst,
            llvm::SmallVector<CVariable*, 4> & payload,
            bool zeroLOD,
            bool feedbackEnable);

        void Info(EOPCODE subOpcode, uint writeMask, const ResourceDescriptor& resource, CVariable* lod, CVariable* dst);

        void Gather4Inst(
            EOPCODE subOpcode,
            CVariable* offset,
            const ResourceDescriptor& resource,
            const SamplerDescriptor& sampler,
            uint numSources,
            CVariable* dst,
            llvm::SmallVector<CVariable*, 4> & payload,
            uint channel,
            bool feedbackEnable);

        void OWLoad(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, bool owordAligned, uint bytesToBeRead, uint dstOffset = 0);
        void OWStore(CVariable* data, e_predefSurface surfaceType, CVariable* bufidx, CVariable* offset, uint bytesToBeRead, uint srcOffset);

        void AddrAdd(CVariable* dst, CVariable* src0, CVariable* src1);
        void Barrier(e_barrierKind BarrierKind);
        void Fence(bool CommitEnable,
            bool L3_Flush_RW_Data,
            bool L3_Flush_Constant_Data,
            bool L3_Flush_Texture_Data,
            bool L3_Flush_Instructions,
            bool Global_Mem_Fence,
            bool L1_Flush,
            bool SWFence);
        void FlushSamplerCache();
        void EOT();
        void OWLoadA64(CVariable* dst, CVariable* offset, uint dstSize, uint dstOffset = 0);
        void OWStoreA64(CVariable* dst, CVariable* offset, uint dstSize, uint srcOffset);
        void MediaBlockMessage(ISA_Opcode subOpcode,
            CVariable* dst,
            e_predefSurface surfaceType,
            CVariable* bufId,
            CVariable* xOffset,
            CVariable* yOffset,
            uint modifier,
            unsigned char blockWidth,
            unsigned char blockHeight,
            uint plane);
        void GatherA64(CVariable* dst, CVariable* offset, unsigned elementSize, unsigned numElems);
        VISA_VectorOpnd* GetVISALSCSurfaceOpnd(e_predefSurface surfaceType, CVariable* bti);
        static LSC_DATA_SIZE LSC_GetElementSize(unsigned eSize, bool is2DBlockMsg = false);
        static LSC_DATA_ELEMS LSC_GetElementNum(unsigned eNum);
        static LSC_ADDR_TYPE getLSCAddrType(const ResourceDescriptor * resource);
        static LSC_ADDR_TYPE getLSCAddrType(e_predefSurface surfaceType);
        void LSC_LoadGather(
            LSC_OP subOp, CVariable* dst, CVariable* offset,
            LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
            unsigned blockOffset, ResourceDescriptor* resource,
            LSC_ADDR_SIZE addr_size, LSC_DATA_ORDER data_order,
            int immOffset, LSC_CACHE_OPTS cacheOpts);
        void LSC_StoreScatter(
            LSC_OP subOp, CVariable * src, CVariable * offset,
            LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
            unsigned blockOffset, ResourceDescriptor * resource,
            LSC_ADDR_SIZE addr_size, LSC_DATA_ORDER data_order,
            int immOffset, LSC_CACHE_OPTS cacheOpts);
        void LSC_LoadBlock1D(
            CVariable* dst, CVariable* offset,
            LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
            ResourceDescriptor* resource,
            LSC_ADDR_SIZE addrSize, int addrImmOffset,
            LSC_CACHE_OPTS cacheOpts);
        void LSC_StoreBlock1D(
            CVariable * src, CVariable * offset,
            LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
            ResourceDescriptor * resource,
            LSC_ADDR_SIZE addrSize, int addrImmOffset,
            LSC_CACHE_OPTS cacheOpts);
        void LSC_AtomicRaw(
            AtomicOp atomic_op, CVariable * dst, CVariable * offset,
            CVariable * src0, CVariable * src1,
            unsigned short bitwidth, ResourceDescriptor * resource,
            LSC_ADDR_SIZE addr_size,
            int immOff, LSC_CACHE_OPTS cacheOpts);
        void LSC_Fence(LSC_SFID sfid, LSC_SCOPE scope, LSC_FENCE_OP op);
        void LSC_2DBlockMessage(
            LSC_OP subOp, ResourceDescriptor* resource,
            CVariable* dst, CVariable* bufId,
            CVariable* xOffset, CVariable* yOffset,
            unsigned char blockWidth,
            unsigned char blockHeight,
            unsigned elemSize, unsigned numBlocks,
            bool isTranspose, bool isVnni,
            CVariable* flatImageBaseoffset, CVariable* flatImageWidth,
            CVariable* flatImageHeight, CVariable* flatImagePitch);
        void NamedBarrier(e_barrierKind BarrierKind, CVariable* src0, CVariable* src1);
        void LSC_TypedReadWrite(
            LSC_OP subOp, ResourceDescriptor* resource,
            CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD,
            CVariable* pSrcDst,
            unsigned elemSize, unsigned numElems,
            LSC_ADDR_SIZE addr_size, int chMask,
            LSC_CACHE_OPTS cacheOpts = { LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT });

        void ScatterA64(CVariable* val, CVariable* offset, unsigned elementSize, unsigned numElems);
        void ByteGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
        void ByteScatter(CVariable* src, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
        void Gather4Scaled(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned Mask = 0);
        void Gather4ScaledNd(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned nd, unsigned Mask = 0);
        void Scatter4Scaled(CVariable* src, const ResourceDescriptor& resource, CVariable* offset);
        void Gather4A64(CVariable* dst, CVariable* offset);
        void Scatter4A64(CVariable* src, CVariable* offset);
        void BoolToInt(CVariable* dst, CVariable* src);
        void Copy(CVariable* dst, CVariable* src);
        void SubroutineCall(CVariable* flag, llvm::Function* F);
        void SubroutineRet(CVariable* flag, llvm::Function* F);
        void StackCall(CVariable* flag, llvm::Function* F, unsigned char argSize, unsigned char retSize);
        void IndirectStackCall(CVariable* flag, CVariable* funcPtr, unsigned char argSize, unsigned char retSize);
        void StackRet(CVariable* flag);
        void Loc(unsigned int line);
        void File(std::string& s);
        void PredAdd(CVariable* flag, CVariable* dst, CVariable* src0, CVariable* src1);
        void DebugLinePlaceholder();

        inline void Jump(uint label);
        inline void Cast(CVariable* dst, CVariable* src);
        inline void Add(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Bfi(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2, CVariable* src3);
        inline void Bfe(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
        inline void Bfrev(CVariable* dst, CVariable* src0);
        inline void CBit(CVariable* dst, CVariable* src0);
        inline void Fbh(CVariable* dst, CVariable* src0);
        inline void Fbl(CVariable* dst, CVariable* src0);
        inline void Mul(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Pow(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Div(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Shl(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Shr(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void MulH(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Cos(CVariable* dst, CVariable* src0);
        inline void Sin(CVariable* dst, CVariable* src0);
        inline void Log(CVariable* dst, CVariable* src0);
        inline void Exp(CVariable* dst, CVariable* src0);
        inline void Frc(CVariable* dst, CVariable* src0);
        inline void Sqrt(CVariable* dst, CVariable* src0);
        inline void Floor(CVariable* dst, CVariable* src0);
        inline void Ceil(CVariable* dst, CVariable* src0);
        inline void Ctlz(CVariable* dst, CVariable* src0);
        inline void Truncate(CVariable* dst, CVariable* src0);
        inline void RoundNE(CVariable* dst, CVariable* src0);
        inline void Mod(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Rsqrt(CVariable* dst, CVariable* src0);
        inline void Inv(CVariable* dst, CVariable* src0);
        inline void Not(CVariable* dst, CVariable* src0);
        // src0 * src1 + src2
        inline void Madw(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
        inline void Mad(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
        inline void Lrp(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
        inline void Xor(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Or(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void And(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Pln(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void SendC(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor);
        inline void SendC(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor);
        inline void LoadMS(EOPCODE subOpcode, uint writeMask, CVariable* offset, const ResourceDescriptor& resource, uint numSources, CVariable* dst, llvm::SmallVector<CVariable*, 4> & payload, bool feedbackEnable);
        inline void SetP(CVariable* dst, CVariable* src);
        inline void Gather(CVariable* dst, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize);
        inline void TypedRead4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD, CVariable* pDst, uint writeMask);
        inline void TypedWrite4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD, CVariable* pSrc, uint writeMask);
        inline void Scatter(CVariable* val, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize);
        inline void IShr(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Min(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void Max(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void UAddC(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void USubB(CVariable* dst, CVariable* src0, CVariable* src1);
        inline void IEEESqrt(CVariable* dst, CVariable* src0);
        inline void IEEEDivide(CVariable* dst, CVariable* src0, CVariable* src1);
        void AddPair(CVariable* Lo, CVariable* Hi, CVariable* L0, CVariable* H0, CVariable* L1, CVariable* H1 = nullptr);
        void SubPair(CVariable* Lo, CVariable* Hi, CVariable* L0, CVariable* H0, CVariable* L1, CVariable* H1);
        inline void dp4a(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
        void Lifetime(VISAVarLifetime StartOrEnd, CVariable* dst);
        void dpas(CVariable* dst, CVariable* input, CVariable* weight, PrecisionType weight_precision,
            CVariable* actication, PrecisionType activation_precision, uint8_t systolicDepth,
            uint8_t repeatCount, bool IsDpasw);
        void fcvt(CVariable* dst, CVariable* src);
        void srnd(CVariable* D, CVariable* S0, CVariable* R);
        void Bfn(uint8_t booleanFuncCtrl, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
        void QWGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
        void QWScatter(CVariable* src, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
        // VME
        void SendVmeIme(
            CVariable* bindingTableIndex,
            unsigned char streamMode,
            unsigned char searchControlMode,
            CVariable* uniInputVar,
            CVariable* imeInputVar,
            CVariable* ref0Var,
            CVariable* ref1Var,
            CVariable* costCenterVar,
            CVariable* outputVar);

        void SendVmeFbr(
            CVariable* bindingTableIndex,
            CVariable* uniInputVar,
            CVariable* fbrInputVar,
            CVariable* FBRMbModeVar,
            CVariable* FBRSubMbShapeVar,
            CVariable* FBRSubPredModeVar,
            CVariable* outputVar);

        void SendVmeSic(
            CVariable* bindingTableIndex,
            CVariable* uniInputVar,
            CVariable* sicInputVar,
            CVariable* outputVar);

        // VA
        void SendVideoAnalytic(
            llvm::GenIntrinsicInst* inst,
            CVariable* vaResult,
            CVariable* coords,
            CVariable* size,
            CVariable* srcImg,
            CVariable* sampler);

        void SetDstSubVar(uint subVar);
        void SetDstSubReg(uint subReg);
        void SetSrcSubVar(uint srcNum, uint subVar);
        void SetSrcSubReg(uint srcNum, uint subReg);
        void SetDstModifier(e_modifier mod);
        void SetDstModifier(const DstModifier& modifier);
        void SetSrcModifier(uint srcNum, e_modifier mod);
        void SetPredicate(CVariable* flag);
        void SetInversePredicate(bool inv);
        void SetPredicateMode(e_predMode mode);
        void SetSrcRegion(uint srcNum, uint vStride, uint width, uint hStride, e_instance instance = EINSTANCE_UNSPECIFIED);
        void SetDstRegion(uint hStride);
        inline void SetNoMask();
        inline void SetMask(e_mask mask);
        inline void SetSimdSize(SIMDMode size);
        inline SIMDMode GetSimdSize();
        inline void SetUniformSIMDSize(SIMDMode size);
        inline void SetSubSpanDestination(bool subspan);
        inline bool IsSubSpanDestination();
        inline void SetSecondHalf(bool secondHalf);
        inline bool IsSecondHalf();
        inline void SetSecondNibble(bool secondNibble);
        inline bool IsSecondNibble();

        inline void SetIsCodePatchCandidate(bool v);
        inline bool IsCodePatchCandidate();
        inline unsigned int GetPayloadEnd();
        inline void SetPayloadEnd(unsigned int payloadEnd);
        inline void SetHasPrevKernel(bool v);
        inline bool HasPrevKernel();
        inline void BeginForcedNoMaskRegion();
        inline void EndForcedNoMaskRegion();

        void Wait();

        VISAKernel* GetVISAKernel() const { return vKernel; }
        VISABuilder* GetVISABuilder() const { return vbuilder; }
        void Init();
        void Push();

        void initCR(VISAKernel* vKernel);
        void SetVectorMask(bool vMask);

        // Switches from actualRM to newRM
        void SetRoundingMode_FP(ERoundingMode actualRM, ERoundingMode newRM);
        void SetRoundingMode_FPCvtInt(ERoundingMode actualRM, ERoundingMode newRM);

        static uint GetCISADataTypeSize(VISA_Type type) {return CVariable::GetCISADataTypeSize(type);}
        static e_alignment GetCISADataTypeAlignment(VISA_Type type) {return CVariable::GetCISADataTypeAlignment(type);}

        static VISASampler3DSubOpCode ConvertSubOpcode(EOPCODE subOpcode, bool zeroLOD);

        // Wrappers for (potentially) common queries on types
        static bool IsIntegerType(VISA_Type type);
        static bool IsFloatType(VISA_Type type);

        void SetVISAWaTable(WA_TABLE const& waTable);

        /// \brief Initialize per function states and starts vISA emission
        /// as a vISA subroutine
        void BeginSubroutine(llvm::Function* F);
        /// \brief Initialize per function states and starts vISA emission
        /// as a vISA stack-call function
        void BeginStackFunction(llvm::Function* F);
        /// \brief Initialize interpolation section for vISA emission
        void BeginPayloadSection();

        void DestroyVISABuilder();

        void AddVISASymbol(std::string& symName, CVariable* cvar);

        std::string GetVariableName(CVariable* var);
        std::string GetDumpFileName(std::string extension);

        bool IsPayloadSectionAsPrimary()    {return vKernel == vPayloadSection;}
        void SetPayloadSectionAsPrimary()   {vKernelTmp = vKernel; vKernel = vPayloadSection;}
        void SetPayloadSectionAsSecondary() {vKernel = vKernelTmp;}

        std::string GetUniqueInlineAsmLabel();

    private:
        // helper functions
        VISA_VectorOpnd* GetSourceOperand(CVariable* var, const SModifier& mod);
        VISA_VectorOpnd* GetSourceOperandNoModifier(CVariable* var);
        VISA_VectorOpnd* GetDestinationOperand(CVariable* var, const SModifier& mod);
        VISA_RawOpnd* GetRawSource(CVariable* var, uint offset = 0);
        VISA_RawOpnd* GetRawDestination(CVariable* var, unsigned offset = 0);
        VISA_PredOpnd* GetFlagOperand(const SFlag& flag);
        VISA_StateOpndHandle* GetVISASurfaceOpnd(e_predefSurface surfaceType, CVariable* bti);
        VISA_StateOpndHandle* GetVISASurfaceOpnd(const ResourceDescriptor& resource);
        VISA_LabelOpnd* GetLabel(uint label);
        VISA_LabelOpnd* GetFuncLabel(llvm::Function* F);
        void InitLabelMap(const llvm::Function* F);
        CName CreateVisaLabelName(const llvm::StringRef &L = "");
        std::string CreateShortLabel(unsigned labelIndex) const;
        // Compiler labels must start with something a user won't use in inline
        // assembly.
        static const char *GetCompilerLabelPrefix() {return "_";}

        VISAFunction* GetStackFunction(llvm::Function* F);

        VISA_VectorOpnd* GetUniformSource(CVariable* var);
        VISA_StateOpndHandle* GetBTIOperand(uint bindingTableIndex);
        VISA_StateOpndHandle* GetSamplerOperand(CVariable* sampleIdx);
        VISA_StateOpndHandle* GetSamplerOperand(const SamplerDescriptor& sampler, bool& isIdxLT16);
        void GetRowAndColOffset(CVariable* var, unsigned int subVar, unsigned int subreg, unsigned char& rowOff, unsigned char& colOff);

        VISA_GenVar* GetVISAVariable(CVariable* var);
        VISA_GenVar* GetVISAVariable(CVariable* var, e_instance instance);
        VISA_EMask_Ctrl ConvertMaskToVisaType(e_mask mask, bool noMask);

        // Generic encoding functions
        void MinMax(CISA_MIN_MAX_SUB_OPCODE subopcode, CVariable* dst, CVariable* src0, CVariable* src1);
        void DataMov(ISA_Opcode opcode, CVariable* dst, CVariable* src);
        void LogicOp(
            ISA_Opcode opcode,
            CVariable* dst,
            CVariable* src0,
            CVariable* src1 = nullptr,
            CVariable* src2 = nullptr,
            CVariable* src3 = nullptr);
        void Arithmetic(
            ISA_Opcode opcode,
            CVariable* dst,
            CVariable* src0 = nullptr,
            CVariable* src1 = nullptr,
            CVariable* src2 = nullptr);
        void CarryBorrowArith(ISA_Opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1);
        void ScatterGather(
            ISA_Opcode opcode,
            CVariable* srcdst,
            CVariable* bufId,
            CVariable* offset,
            CVariable* gOffset,
            e_predefSurface surface,
            int elementSize);
        void TypedReadWrite(
            ISA_Opcode opcode,
            const ResourceDescriptor& resource,
            CVariable* pU,
            CVariable* pV,
            CVariable* pR,
            CVariable* pLOD,
            CVariable* pSrcDst,
            uint writeMask);

        VISA_Exec_Size  GetAluExecSize(CVariable* dst) const;
        VISA_EMask_Ctrl GetAluEMask(CVariable* dst);
        bool IsSat();

        // Variable splitting facilities (if crosses 2 GRF boundary).
        bool NeedSplitting(CVariable* var, const SModifier& mod,
            unsigned& numParts, bool isSource = false) const;
        SModifier SplitVariable(VISA_Exec_Size fromExecSize,
            VISA_Exec_Size toExecSize,
            unsigned thePart,
            CVariable* var, const SModifier& mod,
            bool isSource = false) const;
        VISA_Exec_Size SplitExecSize(VISA_Exec_Size fromExecSize,
            unsigned numParts) const;
        VISA_EMask_Ctrl SplitEMask(VISA_Exec_Size fromExecSize,
            VISA_Exec_Size toExecSize,
            unsigned thePart,
            VISA_EMask_Ctrl execMask) const;

        // Split SIMD16 message data payload(MDP) for scattered/untyped write
        // messages into two SIMD8 MDPs : V0 and V1.
        void SplitPayloadToLowerSIMD(CVariable* MDP, uint32_t MDPOfst, uint32_t NumBlks, CVariable* V0, CVariable* V1, uint32_t fromSize = 16);
        // Merge two SIMD8 MDPs (V0 & V1) for scattered/untyped read messages into one SIMD16 message : MDP
        void MergePayloadToHigherSIMD(CVariable* V0, CVariable* V1, uint32_t NumBlks, CVariable* MDP, uint32_t MDPOfst, uint32_t toSize = 16);

        // save compile time by avoiding retry if the amount of spill is (very) small
        bool AvoidRetryOnSmallSpill() const;

        // CreateSymbolTable, CreateRelocationTable and CreateFuncAttributeTable will create symbols, relococations and FuncAttributes in
        // two formats. One in given buffer that will be later parsed as patch token based format, another as struct type that will be parsed
        // as ZE binary format

        // CreateSymbolTable
        // Note that this function should be called only once even if there are multiple kernels in a program. Current IGC
        // flow will create all symbols in the first kernel and all the other kernels won't contain symbols
        typedef std::vector<std::pair<llvm::Value*, vISA::GenSymEntry>> ValueToSymbolList;
        void CreateSymbolTable(ValueToSymbolList& symbolTableList);
        // input/output: buffer, bufferSize, tableEntries: for patch-token-based format.
        void CreateSymbolTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
        // input/output: symbols: for ZEBinary foramt
        void CreateSymbolTable(SProgramOutput::ZEBinFuncSymbolTable& funcSyms, SOpenCLProgramInfo::ZEBinProgramSymbolTable& programSyms);
        // Create local symbols for kernels. This is ZEBinary format only.
        void CreateLocalSymbol(const std::string& kernelName, vISA::GenSymType type,
            unsigned offset, unsigned size, SProgramOutput::ZEBinFuncSymbolTable& symbols);

        // CreateRelocationTable
        // input/output: buffer, bufferSize, tableEntries: for patch-token-based format.
        void CreateRelocationTable(VISAKernel* pMainKernel, void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
        // input/output: relocations: for ZEBinary foramt
        void CreateRelocationTable(VISAKernel* pMainKernel, SProgramOutput::RelocListTy& relocations);

        // CreateFuncAttributeTable
        void CreateFuncAttributeTable(VISAKernel* pMainKernel, void*& buffer, unsigned& bufferSize, unsigned& tableEntries, SProgramOutput::FuncAttrListTy& attrs);

        // CreateGlobalHostAccessTable
        typedef std::vector<vISA::HostAccessEntry> HostAccessList;
        void CreateGlobalHostAccessTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
        // input/output: hostAccessMap: for patch-token-based format.
        void CreateGlobalHostAccessTable(HostAccessList& hostAccessMap);
        // input/output: global host access names: for ZEBinary format
        void CreateGlobalHostAccessTable(SOpenCLProgramInfo::ZEBinGlobalHostAccessTable& globalHostAccessTable);

        uint32_t getGRFSize() const;

        bool needsSplitting(VISA_Exec_Size ExecSize) const
        {
            if (getGRFSize() == 64)
            {
                return ExecSize == EXEC_SIZE_32;
            }
            return ExecSize == EXEC_SIZE_16;
        }

        // Note that GEN can set both fpCvtInt_rtz and any of FP rounding modes
        // at the same time. If fpCvtInt uses a rounding mode other than rtz,
        // they both uses FP rounding bits.
        //
        // RM bits in CR0.0.
        //    float RM bits: [5:4];
        //    int RM (float -> int): Bit 12: 0 -> rtz; 1 -> using Float RM
        enum RMEncoding {
            // float rounding mode (fp operations, cvt to fp)
            RoundToNearestEven = 0x00,
            RoundToPositive = 0x10,
            RoundToNegative = 0x20,
            RoundToZero = 0x30,
            // int rounding mode (fp cvt int only), use FP RM for all rounding modes but rtz.
            RoundToNearestEven_int = 0x1000,
            RoundToPositive_int = 0x1010,
            RoundToNegative_int = 0x1020,
            RoundToZero_int_unused = 0x1030,
            RoundToZero_int = 0x0000,    // use this for rtz, bit 12 = 0

            IntAndFPRoundingModeMask = 0x1030
        };
        void SetRoundingMode(RMEncoding actualRM, RMEncoding newRM);
        // Get Encoding bit values for rounding mode
        RMEncoding getEncoderRoundingMode_FP(ERoundingMode FP_RM);
        RMEncoding getEncoderRoundingMode_FPCvtInt(ERoundingMode FCvtI_RM);
        unsigned GetRawOpndSplitOffset(VISA_Exec_Size fromExecSize,
            VISA_Exec_Size toExecSize,
            unsigned thePart, CVariable* var) const;

        std::tuple<CVariable*, uint32_t> splitRawOperand(CVariable* var, bool isFirstHalf, VISA_EMask_Ctrl execMask);

        uint32_t getNumChannels(CVariable* var) const;

        void SaveOption(vISAOptions option, bool val);
        void SaveOption(vISAOptions option, uint32_t val);
        void SaveOption(vISAOptions option, const char* val);
        void SetBuilderOptions(VISABuilder* pbuilder);

    protected:
        // encoder states
        SEncoderState m_encoderState = {};

        llvm::DenseMap<SAlias, CVariable*, SAliasMapInfo> m_aliasesMap;

        // vISA needs its own Wa-table as some of the W/A are applicable
        // only to certain APIs/shader types/reg key settings/etc.
        WA_TABLE m_vISAWaTable = {};

        enum OpType
        {
            ET_BOOL,
            ET_INT32,
            ET_CSTR
        };
        struct OptionValue
        {
            OpType type;
            bool vBool;
            uint32_t vInt32;
            const char* vCstr;
        };
        // List of vISA user options
        std::vector<std::pair<vISAOptions, OptionValue>> m_visaUserOptions;

        // Typically IGC just use ones vKernel for every vISA::compile call,
        // in those cases, vKernel and vMainKernel should be the same.
        // Only when using stack-call, vKernel pointer changes every time
        // IGC addes a vISA kernel or function object, but the vMainKernel
        // always pointing to the first kernel added during InitEncoder.
        VISAKernel* vKernel;
        VISAKernel* vMainKernel;
        VISABuilder* vbuilder;
        VISABuilder* vAsmTextBuilder;

        // This is for CodePatch to split payload interpolation from a shader
        VISAKernel* vPayloadSection;
        VISAKernel* vKernelTmp;
        bool m_hasPrevKernel = false;
        unsigned int m_payloadEnd = 0;

        bool m_isCodePatchCandidate = false;

        int m_nestLevelForcedNoMaskRegion = 0;

        bool m_enableVISAdump = false;
        bool m_hasInlineAsm = false;

        std::vector<VISA_LabelOpnd*> labelMap;
        std::vector<CName> labelNameMap; // parallel to labelMap

        /// Per kernel label counter
        unsigned labelCounter = 0;
        /// Per kernel label counter for each inline asm block
        unsigned labelInlineAsmCounter = 0;
        /// Each kernel might emit several functions;
        /// we pre-increment this for each new function we process (InitLabelMap)
        /// The first function will see 0, ...
        unsigned labelFunctionIndex = (unsigned)-1;
        ///
        /// The name of the current function; set if we are emitting labels
        CName currFunctionName;

        /// Keep a map between a function and its label, per kernel state.
        llvm::SmallDenseMap<llvm::Function*, VISA_LabelOpnd*> funcLabelMap;
        /// Keep a map between a stack-called function and the corresponding vISA function
        llvm::SmallDenseMap<llvm::Function*, VISAFunction*> stackFuncMap;

        // dummy variables
        VISA_SurfaceVar* dummySurface;
        VISA_SamplerVar* samplervar;

        CShader* m_program;

        // Keep a map between a function and its per-function attributes needed for function pointer support
        struct FuncAttrib
        {
            bool isKernel = false;
            bool hasBarrier = false;
            unsigned argumentStackSize = 0;
            unsigned allocaStackSize = 0;
        };
        llvm::SmallDenseMap<llvm::Function*, FuncAttrib> funcAttributeMap;

    public:
        // Used by EmitVISAPass to set function attributes
        void InitFuncAttribute(llvm::Function* F, bool isKernel = false) {
            funcAttributeMap[F].isKernel = isKernel;
        }
        void SetFunctionHasBarrier(llvm::Function* F) {
            if (funcAttributeMap.find(F) != funcAttributeMap.end())
                funcAttributeMap[F].hasBarrier = true;
        }
        void SetFunctionMaxArgumentStackSize(llvm::Function* F, unsigned size) {
            if (funcAttributeMap.find(F) != funcAttributeMap.end())
                funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
        }
        void SetFunctionAllocaStackSize(llvm::Function* F, unsigned size) {
            if (funcAttributeMap.find(F) != funcAttributeMap.end())
                funcAttributeMap[F].allocaStackSize = size;
        }
    };

    inline void CEncoder::Jump(uint label)
    {
        Jump(NULL, label);
    }

    inline void CEncoder::Bfi(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2, CVariable* src3)
    {
        LogicOp(ISA_BFI, dst, src0, src1, src2, src3);
    }

    inline void CEncoder::Bfe(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
    {
        LogicOp(ISA_BFE, dst, src0, src1, src2);
    }

    inline void CEncoder::Bfrev(CVariable* dst, CVariable* src0)
    {
        LogicOp(ISA_BFREV, dst, src0);
    }

    inline void CEncoder::CBit(CVariable* dst, CVariable* src)
    {
        LogicOp(ISA_CBIT, dst, src);
    }

    inline void CEncoder::Fbh(CVariable* dst, CVariable* src)
    {
        LogicOp(ISA_FBH, dst, src);
    }

    inline void CEncoder::Fbl(CVariable* dst, CVariable* src)
    {
        LogicOp(ISA_FBL, dst, src);
    }

    inline void CEncoder::Mul(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_MUL, dst, src0, src1);
    }

    inline void CEncoder::Pow(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_POW, dst, src0, src1);
    }

    inline void CEncoder::Div(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_DIV, dst, src0, src1);
    }

    inline void CEncoder::Add(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_ADD, dst, src0, src1);
    }

    inline void CEncoder::Shl(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        LogicOp(ISA_SHL, dst, src0, src1);
    }

    inline void CEncoder::IShr(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        LogicOp(ISA_ASR, dst, src0, src1);
    }

    inline void CEncoder::Shr(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        LogicOp(ISA_SHR, dst, src0, src1);
    }

    inline void CEncoder::MulH(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_MULH, dst, src0, src1);
    }

    inline void CEncoder::Cos(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_COS, dst, src0);
    }

    inline void CEncoder::Sin(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_SIN, dst, src0);
    }

    inline void CEncoder::Log(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_LOG, dst, src0);
    }

    inline void CEncoder::Exp(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_EXP, dst, src0);
    }

    inline void CEncoder::Sqrt(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_SQRT, dst, src0);
    }

    inline void CEncoder::Floor(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_RNDD, dst, src0);
    }

    inline void CEncoder::Ceil(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_RNDU, dst, src0);
    }

    inline void CEncoder::Ctlz(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_LZD, dst, src0);
    }

    inline void CEncoder::Truncate(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_RNDZ, dst, src0);
    }

    inline void CEncoder::RoundNE(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_RNDE, dst, src0);
    }

    inline void CEncoder::Mod(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_MOD, dst, src0, src1);
    }

    inline void CEncoder::Rsqrt(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_RSQRT, dst, src0);
    }

    inline void CEncoder::Inv(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_INV, dst, src0);
    }

    inline void CEncoder::Not(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_NOT, dst, src0);
    }

    inline void CEncoder::Frc(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_FRC, dst, src0);
    }

    inline void CEncoder::Pln(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_PLANE, dst, src0, src1);
    }

    inline void CEncoder::Cast(CVariable* dst, CVariable* src)
    {
        DataMov(ISA_MOV, dst, src);
    }

    // src0 * src1 + src2
    inline void CEncoder::Madw(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
    {
        Arithmetic(ISA_MADW, dst, src0, src1, src2);
    }

    // src0 * src1 + src2
    inline void CEncoder::Mad(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
    {
        Arithmetic(ISA_MAD, dst, src0, src1, src2);
    }

    inline void CEncoder::Lrp(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
    {
        Arithmetic(ISA_LRP, dst, src0, src1, src2);
    }

    inline void CEncoder::Xor(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        LogicOp(ISA_XOR, dst, src0, src1);
    }

    inline void CEncoder::Or(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        LogicOp(ISA_OR, dst, src0, src1);
    }

    inline void CEncoder::And(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        LogicOp(ISA_AND, dst, src0, src1);
    }

    inline void CEncoder::SetP(CVariable* dst, CVariable* src0)
    {
        // We always need no mask when doing a set predicate
        m_encoderState.m_noMask = true;
        DataMov(ISA_SETP, dst, src0);
    }

    inline void CEncoder::Min(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        MinMax(CISA_DM_FMIN, dst, src0, src1);
    }

    inline void CEncoder::Max(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        MinMax(CISA_DM_FMAX, dst, src0, src1);
    }

    inline void CEncoder::UAddC(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        CarryBorrowArith(ISA_ADDC, dst, src0, src1);
    }

    inline void CEncoder::USubB(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        CarryBorrowArith(ISA_SUBB, dst, src0, src1);
    }

    inline void CEncoder::LoadMS(EOPCODE subOpcode, uint writeMask, CVariable* offset,
        const ResourceDescriptor& resource, uint numSources, CVariable* dst,
        llvm::SmallVector<CVariable*, 4> & payload, bool feedbackEnable)
    {
        Load(
            subOpcode,
            writeMask,
            offset,
            resource,
            numSources,
            dst,
            payload,
            false,
            feedbackEnable);
    }

    inline void CEncoder::Gather(CVariable* dst, CVariable* bufId, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize)
    {
        ScatterGather(ISA_GATHER, dst, bufId, offset, gOffset, surface, elementSize);
    }

    inline void CEncoder::TypedRead4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV,
        CVariable* pR, CVariable* pLOD, CVariable* pDst, uint writeMask)
    {
        TypedReadWrite(ISA_GATHER4_TYPED, resource, pU, pV, pR, pLOD, pDst, writeMask);
    }

    inline void CEncoder::TypedWrite4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV,
        CVariable* pR, CVariable* pLOD, CVariable* pSrc, uint writeMask)
    {
        TypedReadWrite(ISA_SCATTER4_TYPED, resource, pU, pV, pR, pLOD, pSrc, writeMask);
    }

    inline void CEncoder::Scatter(CVariable* val, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize)
    {
        ScatterGather(ISA_SCATTER, val, bufidx, offset, gOffset, surface, elementSize);
    }

    inline void CEncoder::SendC(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor)
    {
        Send(dst, src, exDesc, messDescriptor, true);
    }

    inline void CEncoder::SendC(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor)
    {
        Send(dst, src, ffid, exDesc, messDescriptor, true);
    }

    inline void CEncoder::IEEESqrt(CVariable* dst, CVariable* src0)
    {
        Arithmetic(ISA_SQRTM, dst, src0);
    }

    inline void CEncoder::IEEEDivide(CVariable* dst, CVariable* src0, CVariable* src1)
    {
        Arithmetic(ISA_DIVM, dst, src0, src1);
    }

    inline void CEncoder::dp4a(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) {
        Arithmetic(ISA_DP4A, dst, src0, src1, src2);
    }

    inline void CEncoder::SetIsCodePatchCandidate(bool v)
    {
        m_isCodePatchCandidate = v;
    }

    inline bool CEncoder::IsCodePatchCandidate()
    {
        return m_isCodePatchCandidate;
    }

    inline void CEncoder::SetPayloadEnd(unsigned int payloadEnd)
    {
        m_payloadEnd = payloadEnd;
    }

    inline unsigned int CEncoder::GetPayloadEnd()
    {
        return m_payloadEnd;
    }

    inline void CEncoder::SetHasPrevKernel(bool v)
    {
        m_hasPrevKernel = v;
    }

    inline bool CEncoder::HasPrevKernel()
    {
        return m_hasPrevKernel;
    }

    inline void CEncoder::BeginForcedNoMaskRegion()
    {
        ++m_nestLevelForcedNoMaskRegion;
        // Start submitting insts with NoMask control
        m_encoderState.m_noMask = true;
    }

    inline void CEncoder::EndForcedNoMaskRegion()
    {
        --m_nestLevelForcedNoMaskRegion;
        IGC_ASSERT_MESSAGE(m_nestLevelForcedNoMaskRegion >= 0, "Invalid nesting of Unmasked regions");
        // Out of unmasked region, return to submitting insts
        // with Mask control
        if (m_nestLevelForcedNoMaskRegion == 0)
            m_encoderState.m_noMask = false;
    }

    inline void CEncoder::SetNoMask()
    {
        m_encoderState.m_noMask = true;
    }

    inline void CEncoder::SetMask(e_mask mask)
    {
        m_encoderState.m_mask = mask;
    }

    inline void CEncoder::SetSimdSize(SIMDMode size)
    {
        m_encoderState.m_simdSize = size;
    }

    inline SIMDMode CEncoder::GetSimdSize()
    {
        return m_encoderState.m_simdSize;
    }

    inline void CEncoder::SetUniformSIMDSize(SIMDMode size)
    {
        m_encoderState.m_uniformSIMDSize = size;
    }

    inline void CEncoder::SetSubSpanDestination(bool subspan)
    {
        m_encoderState.m_SubSpanDestination = subspan;
    }

    inline void CEncoder::SetSecondHalf(bool secondHalf)
    {
        m_encoderState.m_secondHalf = secondHalf;
    }

    inline bool CEncoder::IsSecondHalf()
    {
        return m_encoderState.m_secondHalf;
    }

    inline void CEncoder::SetSecondNibble(bool secondNibble)
    {
        m_encoderState.m_secondNibble = secondNibble;
    }

    inline bool CEncoder::IsSecondNibble()
    {
        return m_encoderState.m_secondNibble;
    }

    inline bool CEncoder::IsSubSpanDestination()
    {
        return m_encoderState.m_SubSpanDestination;
    }

    VISA_Modifier ConvertModifierToVisaType(e_modifier modifier);
    VISA_Cond_Mod ConvertCondModToVisaType(e_predicate condMod);
    VISA_Oword_Num  ConvertSizeToVisaType(uint size);
    VISAChannelMask ConvertChannelMaskToVisaType(uint mask);
    VISASourceSingleChannel ConvertSingleSourceChannel(uint srcChannel);


    GenPrecision ConvertPrecisionToVisaType(PrecisionType P);
}