File: igc.h

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (216 lines) | stat: -rw-r--r-- 10,728 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*========================== begin_copyright_notice ============================

Copyright (C) 2018-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifndef __IGC_H
#define __IGC_H

#include "igc_api_calls.h"

#if defined(_DEBUG) || defined(_INTERNAL)
#define IGC_DEBUG
#endif

#include <string>
#include <cstdint>
#include "../../common/EmUtils.h"
#include "../../Compiler/CodeGenPublicEnums.h"
// TODO: add all external declarations so that external projects only need
// to include this file only.

// Definition of integer Registry-key Values
// (may merge this into igc_flags.h's macro)
enum {
  // ForcePixelShaderSIMDMode
  FLAG_PS_SIMD_MODE_DEFAULT =
      0, // default is SIMD8 compilation + heuristics to determine if we want to compile SIMD32/SIMD16
  FLAG_PS_SIMD_MODE_FORCE_SIMD8 = 1,  // Force SIMD8 compilation
  FLAG_PS_SIMD_MODE_FORCE_SIMD16 = 2, // Force SIMD16 compilation
  FLAG_PS_SIMD_MODE_FORCE_SIMD32 = 4, // Force SIMD32 compilation
};


enum {
  // VISAPreSchedRPThreshold
  FLAG_VISA_PRE_SCHED_RP_THRESHOLD = 0, // 0 means use the default
};

enum {
  BIT_CG_SIMD8 = 0b0000000000000001,
  BIT_CG_SIMD16 = 0b0000000000000010,
  BIT_CG_SIMD32 = 0b0000000000000100,
  BIT_CG_SPILL8 = 0b0000000000001000,
  BIT_CG_SPILL16 = 0b0000000000010000,
  BIT_CG_SPILL32 = 0b0000000000100000,
  BIT_CG_RETRY = 0b0000000001000000,
  BIT_CG_DO_SIMD32 = 0b0000000010000000,
  BIT_CG_DO_SIMD16 = 0b0000000100000000,
};

typedef unsigned short CG_CTX_STATS_t;

// shader stat for opt customization
typedef struct {
  uint32_t m_tempCount;
  uint32_t m_sampler;
  uint32_t m_inputCount;
  uint32_t m_dxbcCount;
  uint32_t m_ConstantBufferCount;
  IGC::Float_DenormMode m_floatDenormMode16;
  IGC::Float_DenormMode m_floatDenormMode32;
  IGC::Float_DenormMode m_floatDenormMode64;
} SHADER_STATS_t;

typedef struct {
  CG_CTX_STATS_t m_stats;        // record what simd has been generated
  void *m_pixelShaderGen;        // Generated pixel shader output
  char *m_savedBitcodeCharArray; // Serialized Bitcode
  unsigned int m_savedBitcodeCharArraySize;
  void *m_savedInstrTypes;
  SHADER_STATS_t m_savedShaderStats;
} CG_CTX_t;

#define IsRetry(stats) (stats & (BIT_CG_RETRY))
#define DoSimd16(stats) (stats & (BIT_CG_DO_SIMD16))
#define DoSimd32(stats) (stats & (BIT_CG_DO_SIMD32))
#define HasSimd(MODE, stats) (stats & (BIT_CG_SIMD##MODE))
#define HasSimdSpill(MODE, stats) ((stats & (BIT_CG_SIMD##MODE)) && (stats & (BIT_CG_SPILL##MODE)))
#define HasSimdNoSpill(MODE, stats) ((stats & (BIT_CG_SIMD##MODE)) && !(stats & (BIT_CG_SPILL##MODE)))
#define SetRetry(stats) (stats = (stats | (BIT_CG_RETRY)))
#define SetSimd16(stats) (stats = (stats | (BIT_CG_DO_SIMD16)))
#define SetSimd32(stats) (stats = (stats | (BIT_CG_DO_SIMD32)))
#define SetSimdSpill(MODE, stats) (stats = (stats | (BIT_CG_SIMD##MODE) | (BIT_CG_SPILL##MODE)))
#define SetSimdNoSpill(MODE, stats) (stats = (stats | (BIT_CG_SIMD##MODE) & ~(BIT_CG_SPILL##MODE)))


typedef enum CG_FLAG_t {
  FLAG_CG_ALL_SIMDS = 0,
  FLAG_CG_STAGE1_FAST_COMPILE = 1,
  FLAG_CG_STAGE1_BEST_PERF = 2,
  FLAG_CG_STAGE1_FASTEST_COMPILE = 3,
  CG_FLAG_size = 4,
} CG_FLAG_t;

static const char *CG_FLAG_STR[CG_FLAG_size] = {"RestStage2", "FastStage1", "BestStage1", "FastestStage1"};

#define IsSupportedForStagedCompilation(platform, product) (true)
#define IsSupportedForDX12StaticSampler(platform, product) (true)
#define IsSupportedForFastestSingleCSSIMD(platform, product) (true)
#define RequestStage2(flag, ctx_ptr) (ctx_ptr != nullptr || flag == FLAG_CG_STAGE1_FASTEST_COMPILE)

#define IsStage2RestSIMDs(prev_ctx_ptr) (prev_ctx_ptr != nullptr)
#define IsStage1FastCompile(flag, prev_ctx_ptr)                                                                        \
  (!IsStage2RestSIMDs(prev_ctx_ptr) && flag == FLAG_CG_STAGE1_FAST_COMPILE)
#define IsStage1FastestCompile(flag, prev_ctx_ptr)                                                                     \
  (!IsStage2RestSIMDs(prev_ctx_ptr) && flag == FLAG_CG_STAGE1_FASTEST_COMPILE)
#define IsStage1BestPerf(flag, prev_ctx_ptr) (!IsStage2RestSIMDs(prev_ctx_ptr) && flag == FLAG_CG_STAGE1_BEST_PERF)
#define IsAllSIMDs(flag, prev_ctx_ptr) (!IsStage2RestSIMDs(prev_ctx_ptr) && flag == FLAG_CG_ALL_SIMDS)
#define IsStagingContextStage1(pCtx)                                                                                   \
  (IsStage1BestPerf(pCtx->m_CgFlag, pCtx->m_StagingCtx) || IsStage1FastCompile(pCtx->m_CgFlag, pCtx->m_StagingCtx))
#define IsStage1(pCtx) (IsStagingContextStage1(pCtx) || IsStage1FastestCompile(pCtx->m_CgFlag, pCtx->m_StagingCtx))
#define HasSavedIR(pCtx)                                                                                               \
  (pCtx && IsStage2RestSIMDs(pCtx->m_StagingCtx) && pCtx->m_StagingCtx->m_savedBitcodeCharArraySize > 0)

#define DoSimd32Stage2(prev_ctx_ptr) (IsStage2RestSIMDs(prev_ctx_ptr) && DoSimd32(prev_ctx_ptr->m_stats))
#define DoSimd16Stage2(prev_ctx_ptr) (IsStage2RestSIMDs(prev_ctx_ptr) && DoSimd16(prev_ctx_ptr->m_stats))

#define ContinueFastCompileStage1(flag, prev_ctx_ptr, stats)                                                           \
  (IsStage1FastCompile(flag, prev_ctx_ptr) && (IsRetry(stats) || DoSimd16(stats)))

// If the staged compilation enabled, we don't need compile continuation when SIMD8 is spilled.
#define ContinueBestPerfStage1(flag, prev_ctx_ptr, stats)                                                              \
  (IsStage1BestPerf(flag, prev_ctx_ptr) &&                                                                             \
   ((IGC_IS_FLAG_ENABLED(ExtraRetrySIMD16) && !HasSimdSpill(8, stats)) ||                                              \
    (!IGC_IS_FLAG_ENABLED(ExtraRetrySIMD16) && (!HasSimd(8, stats) || DoSimd32(stats)))))

// We don't need compile continuation if no staged compilation enabled denoted by RegKeys.
#define HasCompileContinuation(flag, prev_ctx_ptr, stats)                                                              \
  ((IGC_IS_FLAG_ENABLED(RequestStage2)) &&                                                                             \
   (ContinueFastCompileStage1(flag, prev_ctx_ptr, stats) || ContinueBestPerfStage1(flag, prev_ctx_ptr, stats)))

// Return true when simd MODE has been generated previously
#define AvoidDupStage2(MODE, flag, prev_ctx_ptr)                                                                       \
  (IsStage2RestSIMDs(prev_ctx_ptr) && HasSimdNoSpill(MODE, prev_ctx_ptr->m_stats))

// Fast CG always returns simd 8
#define ValidFastModes(flag, prev_ctx_ptr, stats)                                                                      \
  (IsStage1FastCompile(flag, prev_ctx_ptr) && HasSimd(8, stats) && !HasSimd(16, stats) && !HasSimd(32, stats))
// Fastest CG always returns simd 8
#define ValidFastestModes(flag, prev_ctx_ptr, stats)                                                                   \
  (IsStage1FastestCompile(flag, prev_ctx_ptr) && HasSimd(8, stats) && !HasSimd(16, stats) && !HasSimd(32, stats))
// Best CG returns simd 8 or 16
#define ValidBestModes(flag, prev_ctx_ptr, stats)                                                                      \
  (IsStage1BestPerf(flag, prev_ctx_ptr) && (HasSimd(8, stats) || HasSimd(16, stats)) && !HasSimd(32, stats))
// ALL_SIMDS CG must have simd 8 in any case
#define ValidAllSimdsModes(flag, prev_ctx_ptr, stats) (IsAllSIMDs(flag, prev_ctx_ptr) && HasSimd(8, stats))

// Rest Stage2 CG would not generate duplicated simd 32 or 16 modes, and
// When simd 8 spills in Stage1 for FAST_COMPILE, we may generate simd again;
// otherwise it must be generated either from Stage1 or Stage2
#define ValidStage2Modes(prev_ctx_ptr, stats)                                                                          \
  (IsStage2RestSIMDs(prev_ctx_ptr) && (!HasSimd(32, prev_ctx_ptr->m_stats) || !HasSimd(32, stats)) &&                  \
   (!HasSimd(16, prev_ctx_ptr->m_stats) || !HasSimd(16, stats)) &&                                                     \
   ((HasSimd(8, prev_ctx_ptr->m_stats) ^ HasSimd(8, stats)) || HasSimdSpill(8, prev_ctx_ptr->m_stats)))

#define ValidGeneratedModes(flag, prev_ctx_ptr, stats)                                                                 \
  (ValidFastestModes(flag, prev_ctx_ptr, stats) || ValidFastModes(flag, prev_ctx_ptr, stats) ||                        \
   ValidBestModes(flag, prev_ctx_ptr, stats) || ValidAllSimdsModes(flag, prev_ctx_ptr, stats) ||                       \
   ValidStage2Modes(prev_ctx_ptr, stats))

#define FastestS1Options(ctx_ptr)                                                                                      \
  (IGC_GET_FLAG_VALUE(FastestS1Experiments) ? IGC_GET_FLAG_VALUE(FastestS1Experiments)                                 \
                                            : ctx_ptr->getModuleMetaData()->compOpt.FastestS1Options)

// CodePatch compilation experimental flags
typedef enum {
  CODE_PATCH_NO_EXPRIMENT = 0,
  CODE_PATCH_NO_PullSampleIndex = (0x1 << 0x0),
  CODE_PATCH_NO_PullSnapped = (0x1 << 0x1),
  CODE_PATCH_NO_PullCentroid = (0x1 << 0x2),
  CODE_PATCH_NO_ZWDelta = (0x1 << 0x3),
  CODE_PATCH_NO_UNSTABLE_PLATFORM = (0x1 << 0x4),
} CODE_PATCH_FLAG_t;

// Fastest compilation experimental flags
typedef enum {
  FCEXP_NO_EXPRIMENT = 0,
  FCEXP_DISABLE_LVN = (0x1 << 0x0),
  FCEXP_LINEARSCAN = (0x1 << 0x1),
  FCEXP_DISABLE_GOPT = (0x1 << 0x2),
  FCEXP_1PASSRA = (0x1 << 0x3),
  FCEXP_FASTSPILL = (0x1 << 0x4),
  FCEXP_LOCAL_SCHEDULING = (0x1 << 0x5),
  FCEXP_PRERA_SCHEDULING = (0x1 << 0x6),
  FCEXP_NO_REMAT = (0x1 << 0x7),
  FCEXP_NO_SPILL_COMPRESSION = (0x1 << 0x8),
  FCEXP_LOCAL_DECL_SPLIT_GLOBAL_RA = (0x1 << 0x9),
  FCEXP_QUICKTOKEN_ALLOC = (0x1 << 0xa),
  FCEXP_DISABLE_UNROLL = (0x1 << 0xb),
  FCEXP_TOBE_DESIGNED = (0x1 << 0xc),

  // Current default stage 1 options. *Must* be updated whenever the default changes.
  FCEXP_DEFAULT = (FCEXP_DISABLE_LVN | FCEXP_LINEARSCAN | FCEXP_DISABLE_GOPT | FCEXP_LOCAL_SCHEDULING |
                   FCEXP_PRERA_SCHEDULING | FCEXP_QUICKTOKEN_ALLOC),

  // Alias for UMD to indicate staged compilation must be disabled
  FCEXP_DISABLED = FCEXP_TOBE_DESIGNED
} FCEXP_FLAG_t;

//////////////////////////////////////////////////////////////////////////
/// @brief Structure for passing precompiled LLVM bytecode to IGC.
namespace IGC {
struct BIFModule {
  uint64_t m_ByteCodeSize = 0;
  const void *m_pLLVMBytecode = nullptr;

  // These bits are opaque to the IGC.
  // They can be used to provide configuration data for
  // the function(s) in the LLVM from the bytecode.
  uint64_t m_ConfigBits = 0;
};
} // namespace IGC
#endif // __IGC_H