File: cmdqueue_hw_gen12lp.inl

package info (click to toggle)
intel-compute-runtime 25.44.36015.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,632 kB
  • sloc: cpp: 931,547; lisp: 2,074; sh: 719; makefile: 162; python: 21
file content (224 lines) | stat: -rw-r--r-- 11,807 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*
 * Copyright (C) 2020-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/command_stream/host_function.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/state_base_address.h"

#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/tools/source/metrics/metric.h"

namespace L0 {

template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed, NEO::StreamProperties *streamProperties) {
    using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;

    NEO::Device *neoDevice = device->getNEODevice();
    auto csr = this->getCsr();
    bool isRcs = csr->isRcs();
    auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();

    bool useGlobalSshAndDsh = false;
    bool isDebuggerActive = neoDevice->getDebugger() != nullptr;

    uint64_t globalHeapsBase = 0;
    uint64_t indirectObjectHeapBaseAddress = 0;
    uint64_t bindlessSurfStateBase = 0ull;

    if (neoDevice->getBindlessHeapsHelper()) {
        useGlobalSshAndDsh = true;
        globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();

        if (!neoDevice->getBindlessHeapsHelper()->isGlobalDshSupported()) {
            bindlessSurfStateBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();
        }
    }

    NEO::StateBaseAddressProperties *sbaProperties = nullptr;

    if (streamProperties != nullptr) {
        sbaProperties = &streamProperties->stateBaseAddress;
    }

    uint64_t instructionHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex()));

    STATE_BASE_ADDRESS sbaCmd;

    NEO::EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStream, rootDeviceEnvironment, isRcs, csr->getDcFlushSupport());
    NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(commandStream, {}, true, rootDeviceEnvironment, isRcs);
    auto l1CachePolicyData = csr->getStoredL1CachePolicy();

    NEO::StateBaseAddressHelperArgs<GfxFamily> stateBaseAddressHelperArgs = {
        gsba,                                             // generalStateBaseAddress
        indirectObjectHeapBaseAddress,                    // indirectObjectHeapBaseAddress
        instructionHeapBaseAddress,                       // instructionHeapBaseAddress
        globalHeapsBase,                                  // globalHeapsBaseAddress
        0,                                                // surfaceStateBaseAddress
        bindlessSurfStateBase,                            // bindlessSurfaceStateBaseAddress
        &sbaCmd,                                          // stateBaseAddressCmd
        sbaProperties,                                    // sbaProperties
        nullptr,                                          // dsh
        nullptr,                                          // ioh
        nullptr,                                          // ssh
        neoDevice->getGmmHelper(),                        // gmmHelper
        (device->getMOCS(cachedMOCSAllowed, false) >> 1), // statelessMocsIndex
        l1CachePolicyData->getL1CacheValue(false),        // l1CachePolicy
        l1CachePolicyData->getL1CacheValue(true),         // l1CachePolicyDebuggerActive
        NEO::MemoryCompressionState::notApplicable,       // memoryCompressionState
        true,                                             // setInstructionStateBaseAddress
        true,                                             // setGeneralStateBaseAddress
        useGlobalSshAndDsh,                               // useGlobalHeapsBaseAddress
        false,                                            // isMultiOsContextCapable
        false,                                            // areMultipleSubDevicesInContext
        false,                                            // overrideSurfaceStateBaseAddress
        isDebuggerActive,                                 // isDebuggerActive
        this->doubleSbaWa,                                // doubleSbaWa
        this->heaplessModeEnabled                         // this->heaplessModeEnabled
    };

    NEO::StateBaseAddressHelper<GfxFamily>::programStateBaseAddressIntoCommandStream(stateBaseAddressHelperArgs, commandStream);

    bool sbaTrackingEnabled = (NEO::Debugger::isDebugEnabled(this->internalUsage) && device->getL0Debugger());
    NEO::EncodeStateBaseAddress<GfxFamily>::setSbaTrackingForL0DebuggerIfEnabled(sbaTrackingEnabled, *neoDevice, commandStream, sbaCmd, true);

    NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(commandStream, {}, false, rootDeviceEnvironment, isRcs);

    csr->setGSBAStateDirty(false);
}

template <GFXCORE_FAMILY gfxCoreFamily>
inline size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdDispatchSize(bool bindingTableBaseAddress) {
    using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;

    size_t size = sizeof(STATE_BASE_ADDRESS) + NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier() +
                  NEO::EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(*device->getNEODevice(), this->csr->isRcs());

    size += estimateStateBaseAddressDebugTracking();
    return size;
}

template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
    return estimateStateBaseAddressCmdDispatchSize(true);
}

template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
                                                       NEO::ScratchSpaceController *scratchController,
                                                       NEO::GraphicsAllocation *globalStatelessAllocation,
                                                       bool &gsbaState, bool &frontEndState,
                                                       uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {

    if (perThreadScratchSpaceSlot0Size > 0) {
        scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSlot0Size, 0u,
                                                   csr->getOsContext(), gsbaState, frontEndState);
        auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation();
        csr->makeResident(*scratchAllocation);
    }
}

template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint64_t scratchAddress,
                                                  bool patchNewScratchController,
                                                  void **patchPreambleBuffer) {
    using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
    using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;

    auto &commandsToPatch = commandList.getCommandsToPatch();
    for (auto &commandToPatch : commandsToPatch) {
        switch (commandToPatch.type) {
        case CommandToPatch::FrontEndState: {
            UNRECOVERABLE_IF(true);
            break;
        }
        case CommandToPatch::PauseOnEnqueueSemaphoreStart: {
            NEO::EncodeSemaphore<GfxFamily>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandToPatch.pCommand),
                                                                    csr->getDebugPauseStateGPUAddress(),
                                                                    static_cast<uint32_t>(NEO::DebugPauseState::hasUserStartConfirmation),
                                                                    COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
                                                                    false, true, false, false, false);
            break;
        }
        case CommandToPatch::PauseOnEnqueueSemaphoreEnd: {
            NEO::EncodeSemaphore<GfxFamily>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandToPatch.pCommand),
                                                                    csr->getDebugPauseStateGPUAddress(),
                                                                    static_cast<uint32_t>(NEO::DebugPauseState::hasUserEndConfirmation),
                                                                    COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
                                                                    false, true, false, false, false);
            break;
        }
        case CommandToPatch::PauseOnEnqueuePipeControlStart: {

            NEO::PipeControlArgs args;
            args.dcFlushEnable = csr->getDcFlushSupport();

            auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
            NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
                command,
                NEO::PostSyncMode::immediateData,
                csr->getDebugPauseStateGPUAddress(),
                static_cast<uint64_t>(NEO::DebugPauseState::waitingForUserStartConfirmation),
                device->getNEODevice()->getRootDeviceEnvironment(),
                args);
            break;
        }
        case CommandToPatch::PauseOnEnqueuePipeControlEnd: {

            NEO::PipeControlArgs args;
            args.dcFlushEnable = csr->getDcFlushSupport();

            auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
            NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
                command,
                NEO::PostSyncMode::immediateData,
                csr->getDebugPauseStateGPUAddress(),
                static_cast<uint64_t>(NEO::DebugPauseState::waitingForUserEndConfirmation),
                device->getNEODevice()->getRootDeviceEnvironment(),
                args);
            break;
        }
        case CommandToPatch::NoopSpace: {
            if (commandToPatch.pDestination != nullptr) {
                memset(commandToPatch.pDestination, 0, commandToPatch.patchSize);
            }
            break;
        }
        case CommandToPatch::HostFunctionEntry:
            csr->ensureHostFunctionDataInitialization();
            csr->makeResidentHostFunctionAllocation();
            NEO::HostFunctionHelper::programHostFunctionAddress<GfxFamily>(nullptr, commandToPatch.pCommand, csr->getHostFunctionData(), commandToPatch.baseAddress);
            break;

        case CommandToPatch::HostFunctionUserData:
            NEO::HostFunctionHelper::programHostFunctionUserData<GfxFamily>(nullptr, commandToPatch.pCommand, csr->getHostFunctionData(), commandToPatch.baseAddress);
            break;

        case CommandToPatch::HostFunctionSignalInternalTag:
            NEO::HostFunctionHelper::programSignalHostFunctionStart<GfxFamily>(nullptr, commandToPatch.pCommand, csr->getHostFunctionData());
            break;

        case CommandToPatch::HostFunctionWaitInternalTag:
            NEO::HostFunctionHelper::programWaitForHostFunctionCompletion<GfxFamily>(nullptr, commandToPatch.pCommand, csr->getHostFunctionData());
            break;

        default: {
            UNRECOVERABLE_IF(true);
        }
        }
    }
}

} // namespace L0