1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
/*
* Copyright (C) 2025-2026 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/host_function.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/memory_manager/multi_graphics_allocation.h"
namespace NEO {
template <typename GfxFamily>
void HostFunctionHelper<GfxFamily>::programHostFunction(LinearStream &commandStream, HostFunctionStreamer &streamer, HostFunction &&hostFunction, bool isMemorySynchronizationRequired) {
HostFunctionHelper<GfxFamily>::programHostFunctionId(&commandStream, nullptr, streamer, std::move(hostFunction), isMemorySynchronizationRequired);
auto nPartitions = streamer.getActivePartitions();
for (auto partitionId = 0u; partitionId < nPartitions; partitionId++) {
HostFunctionHelper<GfxFamily>::programHostFunctionWaitForCompletion(&commandStream, nullptr, streamer, partitionId);
}
}
template <typename GfxFamily>
void HostFunctionHelper<GfxFamily>::programHostFunctionId(LinearStream *commandStream, void *cmdBuffer, HostFunctionStreamer &streamer, HostFunction &&hostFunction, bool isMemorySynchronizationRequired) {
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
auto partitionId = 0u; // get address of partition 0, other partitions will use workloadPartitionIdOffset
auto idGpuAddress = streamer.getHostFunctionIdGpuAddress(partitionId);
auto hostFunctionId = streamer.getNextHostFunctionIdAndIncrement();
streamer.addHostFunction(hostFunctionId, std::move(hostFunction));
bool workloadPartitionIdOffsetEnable = streamer.getActivePartitions() > 1;
bool isDcFlushRequired = streamer.getDcFlushRequired();
if (isMemorySynchronizationRequired && isDcFlushRequired) {
PipeControlArgs args{};
args.dcFlushEnable = true;
args.workloadPartitionOffset = workloadPartitionIdOffsetEnable;
if (cmdBuffer == nullptr) {
DEBUG_BREAK_IF(commandStream == nullptr);
cmdBuffer = commandStream->getSpaceForCmd<PIPE_CONTROL>();
}
MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(
cmdBuffer,
NEO::PostSyncMode::immediateData,
idGpuAddress,
hostFunctionId,
args);
} else {
auto lowPart = getLowPart(hostFunctionId);
auto highPart = getHighPart(hostFunctionId);
bool storeQword = true;
EncodeStoreMemory<GfxFamily>::programStoreDataImmCommand(commandStream,
static_cast<MI_STORE_DATA_IMM *>(cmdBuffer),
idGpuAddress,
lowPart,
highPart,
storeQword,
workloadPartitionIdOffsetEnable);
}
}
template <typename GfxFamily>
void HostFunctionHelper<GfxFamily>::programHostFunctionWaitForCompletion(LinearStream *commandStream, void *cmdBuffer, const HostFunctionStreamer &streamer, uint32_t partitionId) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
auto idGpuAddress = streamer.getHostFunctionIdGpuAddress(partitionId);
auto waitValue = HostFunctionStatus::completed;
EncodeSemaphore<GfxFamily>::programMiSemaphoreWaitCommand(commandStream,
cmdBuffer,
idGpuAddress,
waitValue,
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false,
true,
false,
false,
false);
}
} // namespace NEO
|