File: DriverInfo.hpp

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
links: PTS, VCS
area: main
in suites: bookworm
size: 83,912 kB
sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (372 lines) | stat: -rw-r--r-- 18,510 bytes
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#pragma once

#include "common/igc_regkeys.hpp"
#include "common/Types.hpp"
#include "inc/common/igfxfmid.h"
#include "CommonMacros.h"

/*
This provides hook to query whether a feature is supported by the runtime we are compiling for
This file has default value, then each adapter can overload any of the query to tell the backend
what it supports and what it doesn't. This also implements some workaround in case some API
or driver doesn't support something
*/

namespace IGC
{

    class CDriverInfo
    {
    public:
        /// The driver implements the WA using constant buffer 2 for NOS constants instead of 0
        virtual bool implementPushConstantWA() const { return false; }

        /// Driver supports Simple Push Mechanism only.
        virtual bool SupportsSimplePushOnly() const { return false; }

        /// Driver supports Gather Constant Mechanism only.
        virtual bool SupportsGatherConstantOnly() const { return false; }

        /// Driver supports resource streamer if HW supportes it, otherwise simple push
        virtual bool SupportsHWResourceStreameAndSimplePush() const { return false; }

        /// Driver supports dynamic uniform buffers.
        virtual bool SupportsDynamicUniformBuffers() const { return false; }

        /// Is any special metadata translation required
        virtual bool NeedsMetadataTranslation() const { return false; }

        /// Do we need to break down the fmuladd
        virtual bool NeedsBreakdownMulAdd() const { return false; }

        /// The driver supports using scratch space to store the private memory
        virtual bool supportsScratchSpacePrivateMemory() const { return true; }

        /// The driver supports using stateless space to store the private memory
        /// Driver must be able to use at least one way to store the private memory: either "scratch space" or "stateless space"
        /// and by default, driver only supports one of them.
        /// NOTE: This method should only be used for XeHP and above to avoid changes to legacy GENs
        ///         And this is the only place telling if one API supports statelesspvtmem or not.
        ///         If this API doesn't support statelesspvtmem, IGC will error out if pvtmemusage > 256k in PrivateMemoryResolution
        virtual bool supportsStatelessSpacePrivateMemory() const { return !supportsScratchSpacePrivateMemory(); }

        /// The driver requires to align each entry (a workgroup item) of private scratch memory in a stateless
        /// buffer.
        virtual bool requiresPowerOfTwoStatelessSpacePrivateMemorySize() const { return false; }

        /// The driver supports splitting up scratch memory space into two areas:
        /// - private scratch memory space: non-promoted alloca instructions (early allocated scratch
        ///   memory space based on llvm IR)
        /// - spill/fill and Gtpin scratch memory space: (late allocated scratch memory space based
        ///   registry allocation)
        virtual bool supportsSeparatingSpillAndPrivateScratchMemorySpace() const { return IGC_IS_FLAG_ENABLED(SeparateSpillPvtScratchSpace); }

        /// The max size in bytes of the scratch space per thread.
        unsigned int maxPerThreadScratchSpace() const { return 2 * 1024 * 1024; }

        /// The driver Uses special states to push constants beyond index 256
        virtual bool Uses3DSTATE_DX9_CONSTANT() const { return false; }

        /// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
        virtual bool UsesTypedConstantBuffers3D() const { return true; }

        /// The driver uses typed constant buffers requiring byte address access.
        virtual bool UsesTypedConstantBuffersWithByteAddress() const { return false; }

        /// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
        virtual bool UsesTypedConstantBuffersGPGPU() const { return true; }

        /// Overwrite UsesTypedConstantBuffers3D() and UsesTypedConstantBuffersGPGPU()
        /// for bindless buffers only.
        virtual bool ForceUntypedBindlessConstantBuffers() const { return false; }

        /// The driver uses sparse aliased residency
        virtual bool UsesSparseAliasedResidency() const { return false; }

        /// The driver doesn't clear the vertex header so it needs to be done in the compiler
        virtual bool NeedClearVertexHeader() const { return false; }

        /// Do Fastest Stage1 only for 3D
        virtual bool SupportFastestStage1() const { return true; }

        /// do code sinking before CFGSimplification, helps some workloads
        virtual bool CodeSinkingBeforeCFGSimplification() const { return false; }

        /// allow executing constant buffer on the CPU
        virtual bool AllowGenUpdateCB(ShaderType shaderType) const { IGC_UNUSED(shaderType); return false; }

        /// The driver implements single instance vertex dispatch feature
        virtual bool SupportsSingleInstanceVertexDispatch() const { return false; }

        // Allow branch swapping for better Nan perf
        virtual bool BranchSwapping() const { return false; }

        /// Allow propagation up-converstion of half if it can generate better code
        virtual bool AllowUnsafeHalf() const { return true; }

        /// Allow send fusion (Some API have perf regressions, temp use to turn it off)
        virtual bool AllowSendFusion() const { return true; }

        /// Supports more than 16 samplers
        virtual bool SupportMoreThan16Samplers() const { return false; }

        /// API supports IEEE min/max
        virtual bool SupportsIEEEMinMax() const { return false; }

        virtual bool NeedCountSROA() const { return false; }

        /// Can we always contract mul and add
        virtual bool NeedCheckContractionAllowed() const { return false; }

        /// The API generates load/store of doubles which needs to be broken down
        virtual bool HasDoubleLoadStore() const { return false; }

        /// Needs emulation of 64bits instructions
        virtual bool NeedI64BitDivRem() const { return false; }

        /// Return true if IGC needs FP64 emulation. (Valid if platform has no double inst.)
        virtual bool NeedFP64(PRODUCT_FAMILY productFamily) const { IGC_UNUSED(productFamily); return false; }

        /// Needs IEEE fp64 div/sqrt
        virtual bool NeedFP64DivSqrt() const { return false; }

        /// Must support of f32 IEEE divide (also sqrt)
        virtual bool NeedIEEESPDiv() const { return false; }

        /// Has memcpy/memset intrinsic
        virtual bool HasMemoryIntrinsics() const { return false; }

        /// Has load store not natively supported
        virtual bool HasNonNativeLoadStore() const { return false; }

        /// Need lowering global inlined constant buffers
        virtual bool NeedLoweringInlinedConstants() const { return false; }

        /// Turn on type demotion, not tested on all APIs
        virtual bool benefitFromTypeDemotion() const { return false; }

        /// Turn on type rematerialization of flag register, not tested on all APIs
        virtual bool benefitFromPreRARematFlag() const { return false; }

        /// add extra optimization passes after AlwaysInlinerPass to support two phase inlining
        virtual bool NeedExtraPassesAfterAlwaysInlinerPass() const { return false; }

        /// Turn on vISA pre-RA scheduler. Not tested on all APIs
        virtual bool enableVISAPreRAScheduler() const { return false; }

        /// Turn on vISA pre-RA scheduler for retry
        virtual bool enableVISAPreRASchedulerForRetry() const { return false; }

        /// Configure vISA pre-RA scheduler. Not tested on all APIs
        virtual unsigned getVISAPreRASchedulerCtrl() const { return 4; }

        /// Turn on sampler clustering. Hopefully VISA PreRA scheduler with latency hiding can replace it.
        virtual bool enableSampleClustering() const { return true; }

        /// Make sure optimization are consistent to avoid Z-fighting issue
        virtual bool PreventZFighting() const { return false; }

        /// Force enabling SIMD32 in case we exepct latency problem. Helps some workloads
        virtual bool AlwaysEnableSimd32() const { return false; }

        /// Driver supports promoting buffers to bindful
        virtual bool SupportsStatelessToStatefulBufferTransformation() const { return false; }

        /// Need emulation of 64bits type for HW not supporting it natively
        virtual bool Enable64BitEmu() const { return false; }

        /// In some cases several BTI may alias
        virtual bool DisableDpSendReordering() const { return false; }

        /// Driver uses HW alt math mode, this cause floating point operations to behave differently
        virtual bool UseALTMode() const { return false; }

        /// Whether the driver supports blend to fill opt
        virtual bool SupportBlendToFillOpt() const { return false; }

        /// Need to know if the driver can accept more than one SIMD mode for compute shaders
        virtual bool sendMultipleSIMDModes() const { return false; }

        /// pick behavior whether we need to keep discarded helper pixels to calculate
        /// gradient correctly for sampler or we need to force early out discarded pixels
        virtual bool KeepDiscardHelperPixels() const { return false; }

        // Choose to support parsing inlined asm instructions on specific platforms
        virtual bool SupportInlineAssembly() const { return false; }

        /// support predicate add pattern match
        virtual bool SupportMatchPredAdd() const { return false; }

        /// Adjust adapter to adjust the loop unrolling threshold
        virtual unsigned int GetLoopUnrollThreshold() const
        {
            return 4000;
        }

        // ----------------------------------------------------------------------
        // Below are workaround for bugs in front end or IGC will be removed once
        // the bugs are fixed

        /// Need workaround for A32 messages used along with A64
        virtual bool NeedWAToTransformA32MessagesToA64() const { return false; }

        /// disable mad in Vertex shader to avoid ZFigthing issues
        virtual bool DisabeMatchMad() const { return false; }

        /// Some FE sends SLM pointers in DWORD units
        virtual bool WASLMPointersDwordUnit() const { return false; }

        /// Custom pass haven't been tested on all APIs
        virtual bool WADisableCustomPass() const { return false; }

        /// MemOpt2ForOCL pass not tested on all APIs
        virtual bool WAEnableMemOpt2ForOCL() const { return false; }

        /// disable some optimizations for front end which sends IR with unresolved NOS function when optimizing
        virtual bool WaNOSNotResolved() const { return false; }

        /// WA for APIs where frc generates a different precision than x - rndd(x) for small negative values
        /// Needs to switch to use fast math flags
        virtual bool DisableMatchFrcPatternMatch() const { return false; }

        /// Based on the type of inlined sampler we get we program different output.
        virtual bool ProgrammableBorderColorInCompute() const { return false; }

        /// WA for failures with HS with push constants
        virtual bool WaDisablePushConstantsForHS() const { return false; }

        /// WA for failures with push constants and no pushed attributes
        virtual bool WaDisablePushConstantsWithNoPushedAttributes() const { return false; }

        /// Check if we have to worry about stack overflow while recursing in loop analysis
        virtual bool HasSmallStack() const { return false; }

        /// Check if the stateful token is supported
        virtual bool SupportStatefulToken() const { return false; }

        /// Disables dual patch dispatch for APIs that don't use it
        virtual bool APIDisableDSDualPatchDispatch() const { return false; }

        /// WA to make sure scratch writes are globally observed before EOT
        virtual bool clearScratchWriteBeforeEOT() const { return false; }

        /// Should unaligned vectors be split before processing in EmitVISA
        virtual bool splitUnalignedVectors() const { return true; }

        /// Does not emit an error if recursive functions calls are detected.
        virtual bool AllowRecursion() const { return false; }

        /// Restrict dessa aliasing level. -1 : no restriction; max level otherwise.
        virtual int DessaAliasLevel() const { return -1; }

        /// Rounding mode used for DP emulated function, defaults to Round to nearest
        virtual unsigned DPEmulationRoundingMode() const { return 0; }

        /// Check for flushing denormals for DP emulated function
        virtual bool DPEmulationFlushDenorm() const { return false; }

        /// Check for flush to zero for DP emulated function
        virtual bool DPEmulationFlushToZero() const { return false; }

        // Maximum id that can be used by simple push constant buffers. The default is maximum unsigned int (no restriction)
        virtual unsigned int MaximumSimplePushBufferID() const { return std::numeric_limits<unsigned int>::max(); }

        /// Enables the use of inline data on XeHP_SDV+
        virtual bool UseInlineData() const { return false; }

        /// Use first VB to send vertex&base instance and second for draw index
        virtual bool UsesVertexBuffersToSendShaderDrawParameters() const { return false; }

        /// Use indirect payload in CS
        virtual bool UsesIndirectPayload() const { return true; }

        virtual bool SupportsDispatchGPGPUWalkerAlongYFirst() const { return true; }

        /// Check if integer mad is enabled
        virtual bool EnableIntegerMad() const { return false; }

        /// Respect per instruction 'contract' Fast-Math flag
        virtual bool RespectPerInstructionContractFlag() const { return false; }

        /// add shader hash code after EOT for debug purposes
        virtual bool EnableShaderDebugHashCodeInKernel() const { return false; }

        // The size of output printf buffer is 4 MB by default by agreement with Runtime.
        virtual uint32_t getPrintfBufferSize() const
        {
            return 4 * sizeof(MEGABYTE);
        }

        // Limits simple push constants based on pushed inputs
        virtual bool EnableSimplePushRestriction() const { return false; }

        // Determines whether the PAYLOAD_HEADER implicit arg must be present
        virtual bool RequirePayloadHeader() const { return true; }

        virtual bool supportsAutoGRFSelection() const { return autoGRFSelection || IGC_IS_FLAG_ENABLED(ForceSupportsAutoGRFSelection); }
        virtual void setAutoGRFSelection(bool value) { autoGRFSelection = value; }
        virtual bool UseScratchSpaceForATSPlus() const { return false; }
        /// Enables HWGenerateThreadID from API level. To help debug, we must enable it from both API level AND IGC Core level.
        virtual bool SupportHWGenerateTID() const { return false; }
        // Enables the use of simple push constants when on platforms with local (device) memory
        virtual bool supportsSimplePushForLocalMem() const { return false; }
        // disable dual8 with discard
        virtual bool DisableDual8WithDiscard() const { return false; }
        // support force routing to HDC and LCS caching options
        virtual bool SupportForceRouteAndCache() const { return false; }
        // If enabled, IGC must provide the corresponding UMD info on how much
        // memory to allocate for the RTGlobals + global root signature.
        virtual bool supportsExpandedRTGlobals() const { return false; }
        // If enabled, UMD must support setting up threadgroup according to
        // RayTracingCustomTileXDim* and RayTracingCustomTileYDim*. If you want
        // to experiment with non-power-of-2 x dimensions, you also have to
        // support filling local IDs in the indirect state for the shader to
        // read (not required otherwise).
        virtual bool supportsRaytracingTiling() const { return false; }
        // Enables the use of scratch space in raytracing shaders when possible
        virtual bool supportsRTScratchSpace() const { return false; }
        // Enables Raytracing printf
        virtual bool SupportsRTPrintf() const { return false; }
        // enables stateful accesses to the RTAsyncStack, SWHotZone, SWStack and RTSyncStack
        virtual bool supportsRaytracingStatefulAccesses() const { return false; }
        // To support this, the compiler output must be able to express a
        // raygen shader identifier with continuation KSPs after it.
        virtual bool supportsRaytracingContinuationPromotion() const { return false; }
        // To support this, UMD must flip the X and Y dimensions
        virtual bool supportsRaytracingDispatchComputeWalkerAlongYFirst() const { return false; }
        // Will the UMD patch the call stack handler with KSP pointers?
        virtual bool supportsCallStackHandlerPatching() const { return false; }

        // Enable LSC on DG2 for the following:
        //   GenISAIntrinsic::GenISA_ldraw_indexed
        //   GenISAIntrinsic::GenISA_ldrawvector_indexed
        //   GenISAIntrinsic::GenISA_storeraw_indexed
        //   GenISAIntrinsic::GenISA_storerawvector_indexed
        // todo: remove when all APIs enable LSC
        virtual bool EnableLSCForLdRawAndStoreRawOnDG2() const { return false; }
        // Check SLM limit on compute shader to select SIMD8
        virtual bool SupportCSSLMLimit() const { return false; }
        virtual bool supportsSIMD32forCPS() const { return true; }

        // When dual-source blending is enabled, enable sending the
        // single-source RTW message (with data for the second color) after the
        // dual-source blending RTW message. The second message must be send
        // when the state of dual-source blending is not known at compile time.
        virtual bool sendSingleSourceRTWAfterDualSourceRTW() const { return true; }


        // Specifies alignment of indirect data
        virtual unsigned getCrossThreadDataAlignment() const { return 32; }
protected:
    bool autoGRFSelection = false;
    };

}//namespace IGC