1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
|
/*
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/hw_mapper.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
namespace NEO {
class ScratchSpaceController;
} // namespace NEO
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
struct CommandQueueHw : public CommandQueueImp {
using CommandQueueImp::CommandQueueImp;
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) override;
ze_result_t executeCommandLists(uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence, bool performMigration,
NEO::LinearStream *parentImmediateCommandlistLinearStream,
std::unique_lock<std::mutex> *outerLockForIndirect) override;
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed, NEO::StreamProperties *streamProperties);
size_t estimateStateBaseAddressCmdSize();
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties);
MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool &isFrontEndStateDirty, CommandList *commandList,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
NEO::StreamProperties &requiredState,
bool &propertyDirty,
bool &frontEndReturnPoint);
size_t estimateFrontEndCmdSize();
size_t estimateFrontEndCmdSize(bool isFrontEndDirty);
void programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &commandStream);
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
NEO::GraphicsAllocation *globalStatelessAllocation,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSlot0Size,
uint32_t perThreadScratchSpaceSlot1Size);
bool getPreemptionCmdProgramming() override;
void patchCommands(CommandList &commandList, uint64_t scratchAddress, bool patchNewScratchController,
void **patchPreambleBuffer);
protected:
struct CommandListExecutionContext {
CommandListExecutionContext() {}
CommandListExecutionContext(ze_command_list_handle_t *commandListHandles,
uint32_t numCommandLists,
NEO::PreemptionMode contextPreemptionMode,
Device *device,
NEO::ScratchSpaceController *scratchSpaceController,
NEO::GraphicsAllocation *globalStatelessAllocation,
bool debugEnabled,
bool programActivePartitionConfig,
bool performMigration,
bool sipSent);
inline bool isNEODebuggerActive(Device *device);
NEO::StreamProperties cmdListBeginState{};
uint64_t scratchGsba = 0;
uint64_t childGpuAddressPositionBeforeDynamicPreamble = 0;
uint64_t currentGpuAddressForChainedBbStart = 0;
size_t spaceForResidency = 10;
size_t bufferSpaceForPatchPreamble = 0;
size_t totalNoopSpaceForPatchPreamble = 0;
CommandList *firstCommandList = nullptr;
CommandList *lastCommandList = nullptr;
void *currentPatchForChainedBbStart = nullptr;
void *currentPatchPreambleBuffer = nullptr;
uintptr_t basePatchPreambleAddress = 0;
NEO::ScratchSpaceController *scratchSpaceController = nullptr;
NEO::GraphicsAllocation *globalStatelessAllocation = nullptr;
std::unique_lock<std::mutex> *outerLockForIndirect = nullptr;
NEO::PreemptionMode preemptionMode{};
NEO::PreemptionMode statePreemption{};
uint32_t perThreadScratchSpaceSlot0Size = 0;
uint32_t perThreadScratchSpaceSlot1Size = 0;
uint32_t totalActiveScratchPatchElements = 0;
UnifiedMemoryControls unifiedMemoryControls{};
bool anyCommandListWithCooperativeKernels = false;
bool anyCommandListRequiresDisabledEUFusion = false;
bool cachedMOCSAllowed = true;
bool containsAnyRegularCmdList = false;
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
const bool isPreemptionModeInitial{false};
bool isDevicePreemptionModeMidThread{};
bool isDebugEnabled{};
bool stateSipRequired{};
bool isProgramActivePartitionConfigRequired{};
bool isMigrationRequested{};
bool isDirectSubmissionEnabled{};
bool isDispatchTaskCountPostSyncRequired{};
bool hasIndirectAccess{};
bool rtDispatchRequired = false;
bool globalInit = false;
bool lockScratchController = false;
bool cmdListScratchAddressPatchingEnabled = false;
bool containsParentImmediateStream = false;
bool patchPreambleWaitSyncNeeded = false;
};
inline void processMemAdviseOperations(CommandList *commandList);
ze_result_t executeCommandListsRegularHeapless(CommandListExecutionContext &ctx,
uint32_t numCommandLists,
ze_command_list_handle_t *commandListHandles,
ze_fence_handle_t hFence,
NEO::LinearStream *parentImmediateCommandlistLinearStream);
MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
uint32_t numCommandLists,
ze_command_list_handle_t *commandListHandles,
ze_fence_handle_t hFence,
NEO::LinearStream *parentImmediateCommandlistLinearStream);
inline ze_result_t executeCommandListsCopyOnly(CommandListExecutionContext &ctx,
uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence,
NEO::LinearStream *parentImmediateCommandlistLinearStream);
inline size_t computeDebuggerCmdsSize(const CommandListExecutionContext &ctx);
inline size_t computePreemptionSizeForCommandList(CommandListExecutionContext &ctx,
CommandList *commandList,
bool &dirtyState);
inline ze_result_t setupCmdListsAndContextParams(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists,
ze_fence_handle_t hFence,
NEO::LinearStream *parentImmediateCommandlistLinearStream);
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList, bool containsParentImmediateStream) const;
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx);
size_t estimateStreamSizeForExecuteCommandListsRegularHeapless(CommandListExecutionContext &ctx,
uint32_t numCommandLists,
ze_command_list_handle_t *commandListHandles,
bool instructionCacheFlushRequired,
bool stateCacheFlushRequired);
inline size_t estimateCommandListSecondaryStart(CommandList *commandList);
inline size_t estimateCommandListPrimaryStart(bool required);
inline size_t estimateCommandListPatchPreamble(CommandListExecutionContext &ctx, uint32_t numCommandLists);
inline size_t estimateCommandListPatchPreambleFrontEndCmd(CommandListExecutionContext &ctx, CommandList *commandList);
inline void getCommandListPatchPreambleData(CommandListExecutionContext &ctx, CommandList *commandList);
size_t estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
inline size_t estimateTotalPatchPreambleData(CommandListExecutionContext &ctx);
inline void retrivePatchPreambleSpace(CommandListExecutionContext &ctx, NEO::LinearStream &commandStream);
inline void dispatchPatchPreambleEnding(CommandListExecutionContext &ctx);
inline void dispatchPatchPreambleInOrderNoop(CommandListExecutionContext &ctx, CommandList *commandList);
inline void dispatchPatchPreambleCommandListWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
inline size_t estimateCommandListResidencySize(CommandList *commandList);
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
inline void handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx);
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize, CommandListExecutionContext &ctx);
inline void getGlobalFenceAndMakeItResident();
inline void getWorkPartitionAndMakeItResident();
inline void getGlobalStatelessHeapAndMakeItResident(CommandListExecutionContext &ctx);
inline void getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);
inline void makeSbaTrackingBufferResidentIfL0DebuggerEnabled(bool isDebugEnabled);
inline void programCommandQueueDebugCmdsForDebuggerIfEnabled(bool isDebugEnabled, NEO::LinearStream &commandStream);
inline void programStateBaseAddressWithGsbaIfDirty(CommandListExecutionContext &ctx,
ze_command_list_handle_t hCommandList,
NEO::LinearStream &commandStream);
inline void programCsrBaseAddressIfPreemptionModeInitial(bool isPreemptionModeInitial, NEO::LinearStream &commandStream);
inline void programStateSip(bool isStateSipRequired, NEO::LinearStream &commandStream);
inline void updateOneCmdListPreemptionModeAndCtxStatePreemption(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline void makePreemptionAllocationResidentForModeMidThread(bool isDevicePreemptionModeMidThread);
inline void makeSipIsaResidentIfSipKernelUsed(CommandListExecutionContext &ctx);
inline void makeDebugSurfaceResidentIfNEODebuggerActive(bool isNEODebuggerActive);
inline void makeCsrTagAllocationResident();
inline void makeRayTracingBufferResident(NEO::GraphicsAllocation *rtBuffer);
inline void programActivePartitionConfig(bool isProgramActivePartitionConfigRequired, NEO::LinearStream &commandStream);
inline void programOneCmdListFrontEndIfDirty(CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequiredState);
inline void programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
inline void programOneCmdListBatchBufferStartPrimaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
inline void programOneCmdListBatchBufferStartSecondaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
inline void programLastCommandListReturnBbStart(
NEO::LinearStream &commandStream,
CommandListExecutionContext &ctx);
inline void programFrontEndAndClearDirtyFlag(bool shouldFrontEndBeProgrammed,
CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,
NEO::StreamProperties &csrState);
inline void collectPrintfContentsFromCommandsList(CommandList *commandList);
inline void migrateSharedAllocationsIfRequested(bool isMigrationRequested, CommandList *commandList);
inline void prefetchMemoryToDeviceAssociatedWithCmdList(CommandList *commandList);
inline void assignCsrTaskCountToFenceIfAvailable(ze_fence_handle_t hFence);
inline void dispatchTaskCountPostSyncRegular(bool isDispatchTaskCountPostSyncRequired, NEO::LinearStream &commandStream);
inline void dispatchTaskCountPostSyncByMiFlushDw(bool isDispatchTaskCountPostSyncRequired, bool fenceRequired, NEO::LinearStream &commandStream);
NEO::SubmissionStatus prepareAndSubmitBatchBuffer(CommandListExecutionContext &ctx, NEO::LinearStream &innerCommandStream);
inline void cleanLeftoverMemory(NEO::LinearStream &outerCommandStream, NEO::LinearStream &innerCommandStream);
inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired,
uint32_t numCommandLists,
ze_command_list_handle_t *commandListHandles);
inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer();
inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet);
inline size_t estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
bool &gpgpuEnabled,
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline size_t estimatePipelineSelectCmdSize();
inline void programOneCmdListPipelineSelect(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrState,
bool &scmStateDirty,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline void programRequiredStateComputeModeForCommandList(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline size_t estimateStateBaseAddressCmdDispatchSize(bool bindingTableBaseAddress);
inline size_t estimateStateBaseAddressCmdSizeForMultipleCommandLists(bool &baseAddressStateDirty,
NEO::HeapAddressModel commandListHeapAddressModel,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline size_t estimateStateBaseAddressCmdSizeForGlobalStatelessCommandList(bool &baseAddressStateDirty,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline size_t estimateStateBaseAddressCmdSizeForPrivateHeapCommandList(bool &baseAddressStateDirty,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline size_t estimateStateBaseAddressDebugTracking();
inline void programRequiredStateBaseAddressForCommandList(CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline void updateBaseAddressState(CommandList *lastCommandList);
inline void updateDebugSurfaceState(CommandListExecutionContext &ctx);
inline void patchCommands(CommandList &commandList, CommandListExecutionContext &ctx);
void prepareInOrderCommandList(CommandListImp *commandList, CommandListExecutionContext &ctx);
size_t alignedChildStreamPadding{};
};
} // namespace L0
|