1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#pragma once
#include "common/igc_regkeys.hpp"
#include "common/Types.hpp"
#include "inc/common/igfxfmid.h"
#include "CommonMacros.h"
/*
This provides hook to query whether a feature is supported by the runtime we are compiling for
This file has default value, then each adapter can overload any of the query to tell the backend
what it supports and what it doesn't. This also implements some workaround in case some API
or driver doesn't support something
*/
namespace IGC
{
class CDriverInfo
{
public:
/// The driver implements the WA using constant buffer 2 for NOS constants instead of 0
virtual bool implementPushConstantWA() const { return false; }
/// Driver supports Simple Push Mechanism only.
virtual bool SupportsSimplePushOnly() const { return false; }
/// Driver supports Gather Constant Mechanism only.
virtual bool SupportsGatherConstantOnly() const { return false; }
/// Driver supports resource streamer if HW supportes it, otherwise simple push
virtual bool SupportsHWResourceStreameAndSimplePush() const { return false; }
/// Driver supports dynamic uniform buffers.
virtual bool SupportsDynamicUniformBuffers() const { return false; }
/// Is any special metadata translation required
virtual bool NeedsMetadataTranslation() const { return false; }
/// Do we need to break down the fmuladd
virtual bool NeedsBreakdownMulAdd() const { return false; }
/// The driver supports using scratch space to store the private memory
virtual bool supportsScratchSpacePrivateMemory() const { return true; }
/// The driver supports using stateless space to store the private memory
/// Driver must be able to use at least one way to store the private memory: either "scratch space" or "stateless space"
/// and by default, driver only supports one of them.
/// NOTE: This method should only be used for XeHP and above to avoid changes to legacy GENs
/// And this is the only place telling if one API supports statelesspvtmem or not.
/// If this API doesn't support statelesspvtmem, IGC will error out if pvtmemusage > 256k in PrivateMemoryResolution
virtual bool supportsStatelessSpacePrivateMemory() const { return !supportsScratchSpacePrivateMemory(); }
/// The driver requires to align each entry (a workgroup item) of private scratch memory in a stateless
/// buffer.
virtual bool requiresPowerOfTwoStatelessSpacePrivateMemorySize() const { return false; }
/// The driver supports splitting up scratch memory space into two areas:
/// - private scratch memory space: non-promoted alloca instructions (early allocated scratch
/// memory space based on llvm IR)
/// - spill/fill and Gtpin scratch memory space: (late allocated scratch memory space based
/// registry allocation)
virtual bool supportsSeparatingSpillAndPrivateScratchMemorySpace() const { return IGC_IS_FLAG_ENABLED(SeparateSpillPvtScratchSpace); }
/// The max size in bytes of the scratch space per thread.
unsigned int maxPerThreadScratchSpace() const { return 2 * 1024 * 1024; }
/// The driver Uses special states to push constants beyond index 256
virtual bool Uses3DSTATE_DX9_CONSTANT() const { return false; }
/// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
virtual bool UsesTypedConstantBuffers3D() const { return true; }
/// The driver uses typed constant buffers requiring byte address access.
virtual bool UsesTypedConstantBuffersWithByteAddress() const { return false; }
/// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
virtual bool UsesTypedConstantBuffersGPGPU() const { return true; }
/// Overwrite UsesTypedConstantBuffers3D() and UsesTypedConstantBuffersGPGPU()
/// for bindless buffers only.
virtual bool ForceUntypedBindlessConstantBuffers() const { return false; }
/// The driver uses sparse aliased residency
virtual bool UsesSparseAliasedResidency() const { return false; }
/// The driver doesn't clear the vertex header so it needs to be done in the compiler
virtual bool NeedClearVertexHeader() const { return false; }
/// Do Fastest Stage1 only for 3D
virtual bool SupportFastestStage1() const { return true; }
/// do code sinking before CFGSimplification, helps some workloads
virtual bool CodeSinkingBeforeCFGSimplification() const { return false; }
/// allow executing constant buffer on the CPU
virtual bool AllowGenUpdateCB(ShaderType shaderType) const { IGC_UNUSED(shaderType); return false; }
/// The driver implements single instance vertex dispatch feature
virtual bool SupportsSingleInstanceVertexDispatch() const { return false; }
// Allow branch swapping for better Nan perf
virtual bool BranchSwapping() const { return false; }
/// Allow propagation up-converstion of half if it can generate better code
virtual bool AllowUnsafeHalf() const { return true; }
/// Allow send fusion (Some API have perf regressions, temp use to turn it off)
virtual bool AllowSendFusion() const { return true; }
/// Supports more than 16 samplers
virtual bool SupportMoreThan16Samplers() const { return false; }
/// API supports IEEE min/max
virtual bool SupportsIEEEMinMax() const { return false; }
virtual bool NeedCountSROA() const { return false; }
/// Can we always contract mul and add
virtual bool NeedCheckContractionAllowed() const { return false; }
/// The API generates load/store of doubles which needs to be broken down
virtual bool HasDoubleLoadStore() const { return false; }
/// Needs emulation of 64bits instructions
virtual bool NeedI64BitDivRem() const { return false; }
/// Return true if IGC needs FP64 emulation. (Valid if platform has no double inst.)
virtual bool NeedFP64(PRODUCT_FAMILY productFamily) const { IGC_UNUSED(productFamily); return false; }
/// Needs IEEE fp64 div/sqrt
virtual bool NeedFP64DivSqrt() const { return false; }
/// Must support of f32 IEEE divide (also sqrt)
virtual bool NeedIEEESPDiv() const { return false; }
/// Has memcpy/memset intrinsic
virtual bool HasMemoryIntrinsics() const { return false; }
/// Has load store not natively supported
virtual bool HasNonNativeLoadStore() const { return false; }
/// Need lowering global inlined constant buffers
virtual bool NeedLoweringInlinedConstants() const { return false; }
/// Turn on type demotion, not tested on all APIs
virtual bool benefitFromTypeDemotion() const { return false; }
/// Turn on type rematerialization of flag register, not tested on all APIs
virtual bool benefitFromPreRARematFlag() const { return false; }
/// add extra optimization passes after AlwaysInlinerPass to support two phase inlining
virtual bool NeedExtraPassesAfterAlwaysInlinerPass() const { return false; }
/// Turn on vISA pre-RA scheduler. Not tested on all APIs
virtual bool enableVISAPreRAScheduler() const { return false; }
/// Turn on vISA pre-RA scheduler for retry
virtual bool enableVISAPreRASchedulerForRetry() const { return false; }
/// Configure vISA pre-RA scheduler. Not tested on all APIs
virtual unsigned getVISAPreRASchedulerCtrl() const { return 4; }
/// Turn on sampler clustering. Hopefully VISA PreRA scheduler with latency hiding can replace it.
virtual bool enableSampleClustering() const { return true; }
/// Make sure optimization are consistent to avoid Z-fighting issue
virtual bool PreventZFighting() const { return false; }
/// Force enabling SIMD32 in case we exepct latency problem. Helps some workloads
virtual bool AlwaysEnableSimd32() const { return false; }
/// Driver supports promoting buffers to bindful
virtual bool SupportsStatelessToStatefulBufferTransformation() const { return false; }
/// Need emulation of 64bits type for HW not supporting it natively
virtual bool Enable64BitEmu() const { return false; }
/// In some cases several BTI may alias
virtual bool DisableDpSendReordering() const { return false; }
/// Driver uses HW alt math mode, this cause floating point operations to behave differently
virtual bool UseALTMode() const { return false; }
/// Whether the driver supports blend to fill opt
virtual bool SupportBlendToFillOpt() const { return false; }
/// Need to know if the driver can accept more than one SIMD mode for compute shaders
virtual bool sendMultipleSIMDModes() const { return false; }
/// pick behavior whether we need to keep discarded helper pixels to calculate
/// gradient correctly for sampler or we need to force early out discarded pixels
virtual bool KeepDiscardHelperPixels() const { return false; }
// Choose to support parsing inlined asm instructions on specific platforms
virtual bool SupportInlineAssembly() const { return false; }
/// support predicate add pattern match
virtual bool SupportMatchPredAdd() const { return false; }
/// Adjust adapter to adjust the loop unrolling threshold
virtual unsigned int GetLoopUnrollThreshold() const
{
return 4000;
}
// ----------------------------------------------------------------------
// Below are workaround for bugs in front end or IGC will be removed once
// the bugs are fixed
/// Need workaround for A32 messages used along with A64
virtual bool NeedWAToTransformA32MessagesToA64() const { return false; }
/// disable mad in Vertex shader to avoid ZFigthing issues
virtual bool DisabeMatchMad() const { return false; }
/// Some FE sends SLM pointers in DWORD units
virtual bool WASLMPointersDwordUnit() const { return false; }
/// Custom pass haven't been tested on all APIs
virtual bool WADisableCustomPass() const { return false; }
/// MemOpt2ForOCL pass not tested on all APIs
virtual bool WAEnableMemOpt2ForOCL() const { return false; }
/// disable some optimizations for front end which sends IR with unresolved NOS function when optimizing
virtual bool WaNOSNotResolved() const { return false; }
/// WA for APIs where frc generates a different precision than x - rndd(x) for small negative values
/// Needs to switch to use fast math flags
virtual bool DisableMatchFrcPatternMatch() const { return false; }
/// Based on the type of inlined sampler we get we program different output.
virtual bool ProgrammableBorderColorInCompute() const { return false; }
/// WA for failures with HS with push constants
virtual bool WaDisablePushConstantsForHS() const { return false; }
/// WA for failures with push constants and no pushed attributes
virtual bool WaDisablePushConstantsWithNoPushedAttributes() const { return false; }
/// Check if we have to worry about stack overflow while recursing in loop analysis
virtual bool HasSmallStack() const { return false; }
/// Check if the stateful token is supported
virtual bool SupportStatefulToken() const { return false; }
/// Disables dual patch dispatch for APIs that don't use it
virtual bool APIDisableDSDualPatchDispatch() const { return false; }
/// WA to make sure scratch writes are globally observed before EOT
virtual bool clearScratchWriteBeforeEOT() const { return false; }
/// Should unaligned vectors be split before processing in EmitVISA
virtual bool splitUnalignedVectors() const { return true; }
/// Does not emit an error if recursive functions calls are detected.
virtual bool AllowRecursion() const { return false; }
/// Restrict dessa aliasing level. -1 : no restriction; max level otherwise.
virtual int DessaAliasLevel() const { return -1; }
/// Rounding mode used for DP emulated function, defaults to Round to nearest
virtual unsigned DPEmulationRoundingMode() const { return 0; }
/// Check for flushing denormals for DP emulated function
virtual bool DPEmulationFlushDenorm() const { return false; }
/// Check for flush to zero for DP emulated function
virtual bool DPEmulationFlushToZero() const { return false; }
// Maximum id that can be used by simple push constant buffers. The default is maximum unsigned int (no restriction)
virtual unsigned int MaximumSimplePushBufferID() const { return std::numeric_limits<unsigned int>::max(); }
/// Enables the use of inline data on XeHP_SDV+
virtual bool UseInlineData() const { return false; }
/// Use first VB to send vertex&base instance and second for draw index
virtual bool UsesVertexBuffersToSendShaderDrawParameters() const { return false; }
/// Use indirect payload in CS
virtual bool UsesIndirectPayload() const { return true; }
virtual bool SupportsDispatchGPGPUWalkerAlongYFirst() const { return true; }
/// Check if integer mad is enabled
virtual bool EnableIntegerMad() const { return false; }
/// Respect per instruction 'contract' Fast-Math flag
virtual bool RespectPerInstructionContractFlag() const { return false; }
/// add shader hash code after EOT for debug purposes
virtual bool EnableShaderDebugHashCodeInKernel() const { return false; }
// The size of output printf buffer is 4 MB by default by agreement with Runtime.
virtual uint32_t getPrintfBufferSize() const
{
return 4 * sizeof(MEGABYTE);
}
// Limits simple push constants based on pushed inputs
virtual bool EnableSimplePushRestriction() const { return false; }
// Determines whether the PAYLOAD_HEADER implicit arg must be present
virtual bool RequirePayloadHeader() const { return true; }
virtual bool supportsAutoGRFSelection() const { return autoGRFSelection || IGC_IS_FLAG_ENABLED(ForceSupportsAutoGRFSelection); }
virtual void setAutoGRFSelection(bool value) { autoGRFSelection = value; }
virtual bool UseScratchSpaceForATSPlus() const { return false; }
/// Enables HWGenerateThreadID from API level. To help debug, we must enable it from both API level AND IGC Core level.
virtual bool SupportHWGenerateTID() const { return false; }
// Enables the use of simple push constants when on platforms with local (device) memory
virtual bool supportsSimplePushForLocalMem() const { return false; }
// disable dual8 with discard
virtual bool DisableDual8WithDiscard() const { return false; }
// support force routing to HDC and LCS caching options
virtual bool SupportForceRouteAndCache() const { return false; }
// If enabled, IGC must provide the corresponding UMD info on how much
// memory to allocate for the RTGlobals + global root signature.
virtual bool supportsExpandedRTGlobals() const { return false; }
// If enabled, UMD must support setting up threadgroup according to
// RayTracingCustomTileXDim* and RayTracingCustomTileYDim*. If you want
// to experiment with non-power-of-2 x dimensions, you also have to
// support filling local IDs in the indirect state for the shader to
// read (not required otherwise).
virtual bool supportsRaytracingTiling() const { return false; }
// Enables the use of scratch space in raytracing shaders when possible
virtual bool supportsRTScratchSpace() const { return false; }
// Enables Raytracing printf
virtual bool SupportsRTPrintf() const { return false; }
// enables stateful accesses to the RTAsyncStack, SWHotZone, SWStack and RTSyncStack
virtual bool supportsRaytracingStatefulAccesses() const { return false; }
// To support this, the compiler output must be able to express a
// raygen shader identifier with continuation KSPs after it.
virtual bool supportsRaytracingContinuationPromotion() const { return false; }
// To support this, UMD must flip the X and Y dimensions
virtual bool supportsRaytracingDispatchComputeWalkerAlongYFirst() const { return false; }
// Will the UMD patch the call stack handler with KSP pointers?
virtual bool supportsCallStackHandlerPatching() const { return false; }
// Enable LSC on DG2 for the following:
// GenISAIntrinsic::GenISA_ldraw_indexed
// GenISAIntrinsic::GenISA_ldrawvector_indexed
// GenISAIntrinsic::GenISA_storeraw_indexed
// GenISAIntrinsic::GenISA_storerawvector_indexed
// todo: remove when all APIs enable LSC
virtual bool EnableLSCForLdRawAndStoreRawOnDG2() const { return false; }
// Check SLM limit on compute shader to select SIMD8
virtual bool SupportCSSLMLimit() const { return false; }
virtual bool supportsSIMD32forCPS() const { return true; }
// When dual-source blending is enabled, enable sending the
// single-source RTW message (with data for the second color) after the
// dual-source blending RTW message. The second message must be send
// when the state of dual-source blending is not known at compile time.
virtual bool sendSingleSourceRTWAfterDualSourceRTW() const { return true; }
// Specifies alignment of indirect data
virtual unsigned getCrossThreadDataAlignment() const { return 32; }
protected:
bool autoGRFSelection = false;
};
}//namespace IGC
|