File: cmdqueue_hw.h

package info (click to toggle)
intel-compute-runtime 25.44.36015.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,632 kB
  • sloc: cpp: 931,547; lisp: 2,074; sh: 719; makefile: 162; python: 21
file content (277 lines) | stat: -rw-r--r-- 19,819 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*
 * Copyright (C) 2020-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once

#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/hw_mapper.h"
#include "shared/source/unified_memory/unified_memory.h"

#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"

namespace NEO {
class ScratchSpaceController;
} // namespace NEO

namespace L0 {

template <GFXCORE_FAMILY gfxCoreFamily>
struct CommandQueueHw : public CommandQueueImp {
    using CommandQueueImp::CommandQueueImp;
    using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
    ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) override;
    ze_result_t executeCommandLists(uint32_t numCommandLists,
                                    ze_command_list_handle_t *phCommandLists,
                                    ze_fence_handle_t hFence, bool performMigration,
                                    NEO::LinearStream *parentImmediateCommandlistLinearStream,
                                    std::unique_lock<std::mutex> *outerLockForIndirect) override;

    void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed, NEO::StreamProperties *streamProperties);
    size_t estimateStateBaseAddressCmdSize();
    MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties);

    MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool &isFrontEndStateDirty, CommandList *commandList,
                                                                           NEO::StreamProperties &csrState,
                                                                           const NEO::StreamProperties &cmdListRequired,
                                                                           const NEO::StreamProperties &cmdListFinal,
                                                                           NEO::StreamProperties &requiredState,
                                                                           bool &propertyDirty,
                                                                           bool &frontEndReturnPoint);
    size_t estimateFrontEndCmdSize();
    size_t estimateFrontEndCmdSize(bool isFrontEndDirty);

    void programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &commandStream);

    MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
                                             NEO::ScratchSpaceController *scratchController,
                                             NEO::GraphicsAllocation *globalStatelessAllocation,
                                             bool &gsbaState, bool &frontEndState,
                                             uint32_t perThreadScratchSpaceSlot0Size,
                                             uint32_t perThreadScratchSpaceSlot1Size);

    bool getPreemptionCmdProgramming() override;
    void patchCommands(CommandList &commandList, uint64_t scratchAddress, bool patchNewScratchController,
                       void **patchPreambleBuffer);

  protected:
    struct CommandListExecutionContext {

        CommandListExecutionContext() {}

        CommandListExecutionContext(ze_command_list_handle_t *commandListHandles,
                                    uint32_t numCommandLists,
                                    NEO::PreemptionMode contextPreemptionMode,
                                    Device *device,
                                    NEO::ScratchSpaceController *scratchSpaceController,
                                    NEO::GraphicsAllocation *globalStatelessAllocation,
                                    bool debugEnabled,
                                    bool programActivePartitionConfig,
                                    bool performMigration,
                                    bool sipSent);

        inline bool isNEODebuggerActive(Device *device);

        NEO::StreamProperties cmdListBeginState{};
        uint64_t scratchGsba = 0;
        uint64_t childGpuAddressPositionBeforeDynamicPreamble = 0;
        uint64_t currentGpuAddressForChainedBbStart = 0;

        size_t spaceForResidency = 10;
        size_t bufferSpaceForPatchPreamble = 0;
        size_t totalNoopSpaceForPatchPreamble = 0;
        CommandList *firstCommandList = nullptr;
        CommandList *lastCommandList = nullptr;
        void *currentPatchForChainedBbStart = nullptr;
        void *currentPatchPreambleBuffer = nullptr;
        uintptr_t basePatchPreambleAddress = 0;
        NEO::ScratchSpaceController *scratchSpaceController = nullptr;
        NEO::GraphicsAllocation *globalStatelessAllocation = nullptr;
        std::unique_lock<std::mutex> *outerLockForIndirect = nullptr;

        NEO::PreemptionMode preemptionMode{};
        NEO::PreemptionMode statePreemption{};
        uint32_t perThreadScratchSpaceSlot0Size = 0;
        uint32_t perThreadScratchSpaceSlot1Size = 0;
        uint32_t totalActiveScratchPatchElements = 0;
        UnifiedMemoryControls unifiedMemoryControls{};

        bool anyCommandListWithCooperativeKernels = false;
        bool anyCommandListRequiresDisabledEUFusion = false;
        bool cachedMOCSAllowed = true;
        bool containsAnyRegularCmdList = false;
        bool gsbaStateDirty = false;
        bool frontEndStateDirty = false;
        const bool isPreemptionModeInitial{false};
        bool isDevicePreemptionModeMidThread{};
        bool isDebugEnabled{};
        bool stateSipRequired{};
        bool isProgramActivePartitionConfigRequired{};
        bool isMigrationRequested{};
        bool isDirectSubmissionEnabled{};
        bool isDispatchTaskCountPostSyncRequired{};
        bool hasIndirectAccess{};
        bool rtDispatchRequired = false;
        bool globalInit = false;
        bool lockScratchController = false;
        bool cmdListScratchAddressPatchingEnabled = false;
        bool containsParentImmediateStream = false;
        bool patchPreambleWaitSyncNeeded = false;
    };

    inline void processMemAdviseOperations(CommandList *commandList);

    ze_result_t executeCommandListsRegularHeapless(CommandListExecutionContext &ctx,
                                                   uint32_t numCommandLists,
                                                   ze_command_list_handle_t *commandListHandles,
                                                   ze_fence_handle_t hFence,
                                                   NEO::LinearStream *parentImmediateCommandlistLinearStream);

    MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
                                                            uint32_t numCommandLists,
                                                            ze_command_list_handle_t *commandListHandles,
                                                            ze_fence_handle_t hFence,
                                                            NEO::LinearStream *parentImmediateCommandlistLinearStream);
    inline ze_result_t executeCommandListsCopyOnly(CommandListExecutionContext &ctx,
                                                   uint32_t numCommandLists,
                                                   ze_command_list_handle_t *phCommandLists,
                                                   ze_fence_handle_t hFence,
                                                   NEO::LinearStream *parentImmediateCommandlistLinearStream);
    inline size_t computeDebuggerCmdsSize(const CommandListExecutionContext &ctx);
    inline size_t computePreemptionSizeForCommandList(CommandListExecutionContext &ctx,
                                                      CommandList *commandList,
                                                      bool &dirtyState);
    inline ze_result_t setupCmdListsAndContextParams(CommandListExecutionContext &ctx,
                                                     ze_command_list_handle_t *phCommandLists,
                                                     uint32_t numCommandLists,
                                                     ze_fence_handle_t hFence,
                                                     NEO::LinearStream *parentImmediateCommandlistLinearStream);
    MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList, bool containsParentImmediateStream) const;
    inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx);
    size_t estimateStreamSizeForExecuteCommandListsRegularHeapless(CommandListExecutionContext &ctx,
                                                                   uint32_t numCommandLists,
                                                                   ze_command_list_handle_t *commandListHandles,
                                                                   bool instructionCacheFlushRequired,
                                                                   bool stateCacheFlushRequired);
    inline size_t estimateCommandListSecondaryStart(CommandList *commandList);
    inline size_t estimateCommandListPrimaryStart(bool required);
    inline size_t estimateCommandListPatchPreamble(CommandListExecutionContext &ctx, uint32_t numCommandLists);
    inline size_t estimateCommandListPatchPreambleFrontEndCmd(CommandListExecutionContext &ctx, CommandList *commandList);
    inline void getCommandListPatchPreambleData(CommandListExecutionContext &ctx, CommandList *commandList);
    size_t estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
    inline size_t estimateTotalPatchPreambleData(CommandListExecutionContext &ctx);
    inline void retrivePatchPreambleSpace(CommandListExecutionContext &ctx, NEO::LinearStream &commandStream);
    inline void dispatchPatchPreambleEnding(CommandListExecutionContext &ctx);
    inline void dispatchPatchPreambleInOrderNoop(CommandListExecutionContext &ctx, CommandList *commandList);
    inline void dispatchPatchPreambleCommandListWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
    inline size_t estimateCommandListResidencySize(CommandList *commandList);
    inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
    inline void handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx);
    inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
                                                        ze_command_list_handle_t *phCommandLists,
                                                        uint32_t numCommandLists);
    MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize, CommandListExecutionContext &ctx);
    inline void getGlobalFenceAndMakeItResident();
    inline void getWorkPartitionAndMakeItResident();
    inline void getGlobalStatelessHeapAndMakeItResident(CommandListExecutionContext &ctx);
    inline void getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);
    inline void makeSbaTrackingBufferResidentIfL0DebuggerEnabled(bool isDebugEnabled);
    inline void programCommandQueueDebugCmdsForDebuggerIfEnabled(bool isDebugEnabled, NEO::LinearStream &commandStream);
    inline void programStateBaseAddressWithGsbaIfDirty(CommandListExecutionContext &ctx,
                                                       ze_command_list_handle_t hCommandList,
                                                       NEO::LinearStream &commandStream);
    inline void programCsrBaseAddressIfPreemptionModeInitial(bool isPreemptionModeInitial, NEO::LinearStream &commandStream);
    inline void programStateSip(bool isStateSipRequired, NEO::LinearStream &commandStream);
    inline void updateOneCmdListPreemptionModeAndCtxStatePreemption(NEO::LinearStream &commandStream,
                                                                    CommandListRequiredStateChange &cmdListRequired);
    inline void makePreemptionAllocationResidentForModeMidThread(bool isDevicePreemptionModeMidThread);
    inline void makeSipIsaResidentIfSipKernelUsed(CommandListExecutionContext &ctx);
    inline void makeDebugSurfaceResidentIfNEODebuggerActive(bool isNEODebuggerActive);
    inline void makeCsrTagAllocationResident();
    inline void makeRayTracingBufferResident(NEO::GraphicsAllocation *rtBuffer);

    inline void programActivePartitionConfig(bool isProgramActivePartitionConfigRequired, NEO::LinearStream &commandStream);
    inline void programOneCmdListFrontEndIfDirty(CommandListExecutionContext &ctx,
                                                 NEO::LinearStream &commandStream,
                                                 CommandListRequiredStateChange &cmdListRequiredState);
    inline void programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
    inline void programOneCmdListBatchBufferStartPrimaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
    inline void programOneCmdListBatchBufferStartSecondaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
    inline void programLastCommandListReturnBbStart(
        NEO::LinearStream &commandStream,
        CommandListExecutionContext &ctx);
    inline void programFrontEndAndClearDirtyFlag(bool shouldFrontEndBeProgrammed,
                                                 CommandListExecutionContext &ctx,
                                                 NEO::LinearStream &commandStream,
                                                 NEO::StreamProperties &csrState);
    inline void collectPrintfContentsFromCommandsList(CommandList *commandList);
    inline void migrateSharedAllocationsIfRequested(bool isMigrationRequested, CommandList *commandList);
    inline void prefetchMemoryToDeviceAssociatedWithCmdList(CommandList *commandList);
    inline void assignCsrTaskCountToFenceIfAvailable(ze_fence_handle_t hFence);
    inline void dispatchTaskCountPostSyncRegular(bool isDispatchTaskCountPostSyncRequired, NEO::LinearStream &commandStream);
    inline void dispatchTaskCountPostSyncByMiFlushDw(bool isDispatchTaskCountPostSyncRequired, bool fenceRequired, NEO::LinearStream &commandStream);
    NEO::SubmissionStatus prepareAndSubmitBatchBuffer(CommandListExecutionContext &ctx, NEO::LinearStream &innerCommandStream);

    inline void cleanLeftoverMemory(NEO::LinearStream &outerCommandStream, NEO::LinearStream &innerCommandStream);
    inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired,
                                           uint32_t numCommandLists,
                                           ze_command_list_handle_t *commandListHandles);
    inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer();
    inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet);
    inline size_t estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrState,
                                                                       const NEO::StreamProperties &cmdListRequired,
                                                                       const NEO::StreamProperties &cmdListFinal,
                                                                       bool &gpgpuEnabled,
                                                                       NEO::StreamProperties &requiredState,
                                                                       bool &propertyDirty);
    inline size_t estimatePipelineSelectCmdSize();
    inline void programOneCmdListPipelineSelect(NEO::LinearStream &commandStream,
                                                CommandListRequiredStateChange &cmdListRequired);

    inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrState,
                                                            bool &scmStateDirty,
                                                            const NEO::StreamProperties &cmdListRequired,
                                                            const NEO::StreamProperties &cmdListFinal,
                                                            NEO::StreamProperties &requiredState,
                                                            bool &propertyDirty);

    inline void programRequiredStateComputeModeForCommandList(NEO::LinearStream &commandStream,
                                                              CommandListRequiredStateChange &cmdListRequired);

    inline size_t estimateStateBaseAddressCmdDispatchSize(bool bindingTableBaseAddress);
    inline size_t estimateStateBaseAddressCmdSizeForMultipleCommandLists(bool &baseAddressStateDirty,
                                                                         NEO::HeapAddressModel commandListHeapAddressModel,
                                                                         NEO::StreamProperties &csrState,
                                                                         const NEO::StreamProperties &cmdListRequired,
                                                                         const NEO::StreamProperties &cmdListFinal,
                                                                         NEO::StreamProperties &requiredState,
                                                                         bool &propertyDirty);
    inline size_t estimateStateBaseAddressCmdSizeForGlobalStatelessCommandList(bool &baseAddressStateDirty,
                                                                               NEO::StreamProperties &csrState,
                                                                               const NEO::StreamProperties &cmdListRequired,
                                                                               const NEO::StreamProperties &cmdListFinal,
                                                                               NEO::StreamProperties &requiredState,
                                                                               bool &propertyDirty);
    inline size_t estimateStateBaseAddressCmdSizeForPrivateHeapCommandList(bool &baseAddressStateDirty,
                                                                           NEO::StreamProperties &csrState,
                                                                           const NEO::StreamProperties &cmdListRequired,
                                                                           const NEO::StreamProperties &cmdListFinal,
                                                                           NEO::StreamProperties &requiredState,
                                                                           bool &propertyDirty);
    inline size_t estimateStateBaseAddressDebugTracking();

    inline void programRequiredStateBaseAddressForCommandList(CommandListExecutionContext &ctx,
                                                              NEO::LinearStream &commandStream,
                                                              CommandListRequiredStateChange &cmdListRequired);
    inline void updateBaseAddressState(CommandList *lastCommandList);
    inline void updateDebugSurfaceState(CommandListExecutionContext &ctx);
    inline void patchCommands(CommandList &commandList, CommandListExecutionContext &ctx);
    void prepareInOrderCommandList(CommandListImp *commandList, CommandListExecutionContext &ctx);

    size_t alignedChildStreamPadding{};
};

} // namespace L0