1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
/*
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/program/work_size_info.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/program/kernel_info.h"
#include <cmath>
namespace NEO {
WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion) {
this->maxWorkGroupSize = maxWorkGroupSize;
this->hasBarriers = hasBarriers;
this->simdSize = simdSize;
this->slmTotalSize = slmTotalSize;
this->coreFamily = rootDeviceEnvironment.getHardwareInfo()->platform.eRenderCoreFamily;
this->numThreadsPerSubSlice = numThreadsPerSubSlice;
this->localMemSize = localMemSize;
this->imgUsed = imgUsed;
this->yTiledSurfaces = yTiledSurface;
setMinWorkGroupSize(rootDeviceEnvironment, disableEUFusion);
}
void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
for (const auto &arg : kernelInfo.kernelDescriptor.payloadMappings.explicitArgs) {
if (arg.is<ArgDescriptor::argTImage>()) {
imgUsed = true;
yTiledSurfaces = true;
return;
}
}
}
void WorkSizeInfo::setMinWorkGroupSize(const RootDeviceEnvironment &rootDeviceEnvironment, bool disableEUFusion) {
minWorkGroupSize = 0;
if (hasBarriers) {
uint32_t maxBarriersPerHSlice = 32;
minWorkGroupSize = numThreadsPerSubSlice * simdSize / maxBarriersPerHSlice;
}
if (slmTotalSize > 0) {
if (localMemSize < slmTotalSize) {
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
}
UNRECOVERABLE_IF(localMemSize < slmTotalSize);
minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize);
}
const auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
if (gfxCoreHelper.isFusedEuDispatchEnabled(*rootDeviceEnvironment.getHardwareInfo(), disableEUFusion)) {
minWorkGroupSize *= 2;
}
}
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
if (slmTotalSize > 0) {
useRatio = true;
targetRatio = log((float)workItems[0]) - log((float)workItems[1]);
useStrictRatio = false;
} else if (yTiledSurfaces == true) {
useRatio = true;
targetRatio = yTilingRatioValue;
useStrictRatio = true;
}
}
void WorkSizeInfo::setPreferredWgCountPerSubslice(uint32_t preferredWgCount) {
preferredWgCountPerSubSlice = preferredWgCount;
}
} // namespace NEO
|