1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
|
/*
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/kernel_arg_descriptor.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/test/common/test_macros/test.h"
TEST(KernelDescriptor, WhenDefaultInitializedThenValuesAreCleared) {
NEO::KernelDescriptor desc;
for (auto &element : desc.kernelAttributes.flags.packed) {
EXPECT_EQ(0U, element);
}
EXPECT_EQ(0U, desc.kernelAttributes.slmInlineSize);
EXPECT_EQ(0U, desc.kernelAttributes.perThreadScratchSize[0]);
EXPECT_EQ(0U, desc.kernelAttributes.perThreadScratchSize[1]);
EXPECT_EQ(0U, desc.kernelAttributes.perHwThreadPrivateMemorySize);
EXPECT_EQ(0U, desc.kernelAttributes.perThreadSystemThreadSurfaceSize);
EXPECT_EQ(0U, desc.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ(0U, desc.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ(0U, desc.kernelAttributes.requiredWorkgroupSize[2]);
EXPECT_EQ(0U, desc.kernelAttributes.crossThreadDataSize);
EXPECT_EQ(0U, desc.kernelAttributes.perThreadDataSize);
EXPECT_EQ(0U, desc.kernelAttributes.numArgsToPatch);
EXPECT_EQ(128U, desc.kernelAttributes.numGrfRequired);
EXPECT_EQ(NEO::KernelDescriptor::BindfulAndStateless, desc.kernelAttributes.bufferAddressingMode);
EXPECT_EQ(NEO::KernelDescriptor::Bindful, desc.kernelAttributes.imageAddressingMode);
EXPECT_EQ(NEO::KernelDescriptor::Bindful, desc.kernelAttributes.samplerAddressingMode);
EXPECT_EQ(0U, desc.kernelAttributes.workgroupWalkOrder[0]);
EXPECT_EQ(1U, desc.kernelAttributes.workgroupWalkOrder[1]);
EXPECT_EQ(2U, desc.kernelAttributes.workgroupWalkOrder[2]);
EXPECT_EQ(0U, desc.kernelAttributes.workgroupDimensionsOrder[0]);
EXPECT_EQ(1U, desc.kernelAttributes.workgroupDimensionsOrder[1]);
EXPECT_EQ(2U, desc.kernelAttributes.workgroupDimensionsOrder[2]);
EXPECT_EQ(0U, desc.kernelAttributes.gpuPointerSize);
EXPECT_EQ(8U, desc.kernelAttributes.simdSize);
EXPECT_EQ(0U, desc.kernelAttributes.numLocalIdChannels);
EXPECT_EQ(0U, desc.entryPoints.skipPerThreadDataLoad);
EXPECT_EQ(0U, desc.entryPoints.skipSetFFIDGP);
EXPECT_EQ(0U, desc.entryPoints.systemKernel);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.globalWorkOffset[0]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.globalWorkOffset[1]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.globalWorkOffset[2]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.globalWorkSize[0]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.globalWorkSize[1]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.globalWorkSize[2]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.localWorkSize[0]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.localWorkSize[1]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.localWorkSize[2]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.localWorkSize2[0]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.localWorkSize2[1]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.localWorkSize2[2]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[1]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[2]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.numWorkGroups[0]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.numWorkGroups[1]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.numWorkGroups[2]);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.dispatchTraits.workDim);
EXPECT_EQ(NEO::undefined<NEO::SurfaceStateHeapOffset>, desc.payloadMappings.bindingTable.tableOffset);
EXPECT_EQ(0U, desc.payloadMappings.bindingTable.numEntries);
EXPECT_EQ(NEO::undefined<NEO::DynamicStateHeapOffset>, desc.payloadMappings.samplerTable.tableOffset);
EXPECT_EQ(NEO::undefined<NEO::DynamicStateHeapOffset>, desc.payloadMappings.samplerTable.borderColor);
EXPECT_EQ(0U, desc.payloadMappings.samplerTable.numSamplers);
EXPECT_EQ(0U, desc.payloadMappings.explicitArgs.size());
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.privateMemorySize);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.maxWorkGroupSize);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.simdSize);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.preferredWkgMultiple);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.localMemoryStatelessWindowSize);
EXPECT_EQ(NEO::undefined<NEO::CrossThreadDataOffset>, desc.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres);
EXPECT_TRUE(desc.kernelMetadata.kernelName.empty());
EXPECT_TRUE(desc.kernelMetadata.kernelLanguageAttributes.empty());
EXPECT_TRUE(desc.kernelMetadata.printfStringsMap.empty());
EXPECT_EQ(0U, desc.kernelMetadata.compiledSubGroupsNumber);
EXPECT_EQ(0U, desc.kernelMetadata.requiredSubGroupSize);
EXPECT_EQ(0U, desc.kernelMetadata.requiredThreadGroupDispatchSize);
EXPECT_EQ(nullptr, desc.external.debugData.get());
EXPECT_EQ(nullptr, desc.external.igcInfoForGtpin);
}
TEST(KernelDescriptorAttributesSupportsBuffersBiggerThan4Gb, GivenPureStatelessBufferAddressingThenReturnTrue) {
NEO::KernelDescriptor desc;
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::Stateless;
EXPECT_TRUE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb());
}
TEST(KernelDescriptorAttributesSupportsBuffersBiggerThan4Gb, GivenStatefulBufferAddressingThenReturnFalse) {
NEO::KernelDescriptor desc;
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::Bindful;
EXPECT_FALSE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb());
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless;
EXPECT_FALSE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb());
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::Bindless;
EXPECT_FALSE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb());
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
EXPECT_FALSE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb());
}
TEST(KernelDescriptor, GivenBufferOrImageBindlessAddressingWhenIsBindlessAddressingKernelCalledThenTrueIsReturned) {
NEO::KernelDescriptor desc;
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::Bindful;
EXPECT_FALSE(NEO::KernelDescriptor::isBindlessAddressingKernel(desc));
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless;
EXPECT_FALSE(NEO::KernelDescriptor::isBindlessAddressingKernel(desc));
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::Bindless;
EXPECT_TRUE(NEO::KernelDescriptor::isBindlessAddressingKernel(desc));
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
EXPECT_TRUE(NEO::KernelDescriptor::isBindlessAddressingKernel(desc));
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::AddrNone;
desc.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
desc.kernelAttributes.flags.usesImages = true;
EXPECT_TRUE(NEO::KernelDescriptor::isBindlessAddressingKernel(desc));
}
TEST(KernelDescriptor, GivenDescriptorWithBindlessArgsWhenInitBindlessOffsetsToSurfaceStateCalledThenMapIsInitializedOnceAndReturnsCorrectSurfaceIndices) {
NEO::KernelDescriptor desc;
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
desc.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x40;
desc.payloadMappings.explicitArgs.push_back(argDescriptor);
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().bindless = NEO::undefined<NEO::CrossThreadDataOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 0x80;
desc.payloadMappings.explicitArgs.push_back(argDescriptor2);
auto argDescriptor3 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTImage);
argDescriptor3.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptor3.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor3.as<NEO::ArgDescImage>().bindless = 0x100;
desc.payloadMappings.explicitArgs.push_back(argDescriptor3);
auto argDescriptor4 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTImage);
argDescriptor4.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptor4.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor4.as<NEO::ArgDescImage>().bindless = NEO::undefined<NEO::CrossThreadDataOffset>;
desc.payloadMappings.explicitArgs.push_back(argDescriptor4);
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x80;
desc.payloadMappings.implicitArgs.globalVariablesSurfaceAddress = argDescriptor.as<NEO::ArgDescPointer>();
desc.initBindlessOffsetToSurfaceState();
EXPECT_EQ(3u, desc.bindlessArgsMap.size());
EXPECT_EQ(0u, desc.bindlessArgsMap[0x40]);
EXPECT_EQ(1u, desc.bindlessArgsMap[0x100]);
EXPECT_EQ(2u, desc.bindlessArgsMap[0x80]);
EXPECT_EQ(0u, desc.getBindlessOffsetToSurfaceState().find(0x40)->second);
EXPECT_EQ(1u, desc.getBindlessOffsetToSurfaceState().find(0x100)->second);
EXPECT_EQ(2u, desc.getBindlessOffsetToSurfaceState().find(0x80)->second);
desc.bindlessArgsMap.clear();
desc.initBindlessOffsetToSurfaceState();
EXPECT_EQ(0u, desc.bindlessArgsMap.size());
}
TEST(KernelDescriptor, GivenDescriptorWithBindlessExplicitAndImplicitArgsWhenInitBindlessOffsetsToSurfaceStateCalledThenMapIsInitializedOnceAndReturnsCorrectSurfaceIndices) {
NEO::KernelDescriptor desc;
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
desc.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x40;
desc.payloadMappings.explicitArgs.push_back(argDescriptor);
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().bindless = NEO::undefined<NEO::CrossThreadDataOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 0x80;
desc.payloadMappings.explicitArgs.push_back(argDescriptor2);
auto argDescriptor3 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTImage);
argDescriptor3.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptor3.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor3.as<NEO::ArgDescImage>().bindless = 0x100;
desc.payloadMappings.explicitArgs.push_back(argDescriptor3);
auto argDescriptor4 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTImage);
argDescriptor4.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptor4.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor4.as<NEO::ArgDescImage>().bindless = NEO::undefined<NEO::CrossThreadDataOffset>;
desc.payloadMappings.explicitArgs.push_back(argDescriptor4);
desc.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 0x140;
desc.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless = 0x180;
desc.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x220;
desc.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless = 0x260;
desc.initBindlessOffsetToSurfaceState();
EXPECT_EQ(4u, desc.bindlessArgsMap.size());
EXPECT_EQ(0u, desc.bindlessArgsMap[0x40]);
EXPECT_EQ(1u, desc.bindlessArgsMap[0x100]);
EXPECT_EQ(2u, desc.bindlessArgsMap[0x140]);
EXPECT_EQ(3u, desc.bindlessArgsMap[0x220]);
EXPECT_EQ(0u, desc.getBindlessOffsetToSurfaceState().find(0x40)->second);
EXPECT_EQ(1u, desc.getBindlessOffsetToSurfaceState().find(0x100)->second);
EXPECT_EQ(2u, desc.getBindlessOffsetToSurfaceState().find(0x140)->second);
EXPECT_EQ(3u, desc.getBindlessOffsetToSurfaceState().find(0x220)->second);
desc.bindlessArgsMap.clear();
desc.initBindlessOffsetToSurfaceState();
EXPECT_EQ(0u, desc.bindlessArgsMap.size());
}
TEST(KernelDescriptor, GivenDescriptorWithoutStatefulArgsWhenInitBindlessOffsetsToSurfaceStateCalledThenMapOfBindlessOffsetToSurfaceStateIndexIsEmpty) {
NEO::KernelDescriptor desc;
desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
desc.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = NEO::undefined<NEO::CrossThreadDataOffset>;
argDescriptor.as<NEO::ArgDescPointer>().stateless = 0x40;
desc.payloadMappings.explicitArgs.push_back(argDescriptor);
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().bindless = NEO::undefined<NEO::CrossThreadDataOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 0x80;
desc.payloadMappings.explicitArgs.push_back(argDescriptor2);
NEO::ArgDescValue::Element argValueElement;
argValueElement.offset = 0x80;
auto argDescriptor3 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTValue);
argDescriptor3.as<NEO::ArgDescValue>().elements.push_back(argValueElement);
desc.payloadMappings.explicitArgs.push_back(argDescriptor3);
desc.initBindlessOffsetToSurfaceState();
EXPECT_EQ(0u, desc.bindlessArgsMap.size());
}
TEST(KernelDescriptor, GivenDescriptorWhenGettingPerThreadDataOffsetThenItReturnsCorrectValue) {
NEO::KernelDescriptor desc{};
desc.kernelAttributes.crossThreadDataSize = 64u;
desc.kernelAttributes.inlineDataPayloadSize = 64u;
EXPECT_EQ(0u, desc.getPerThreadDataOffset());
// crossThreadData is fully consumed by inlineDataPayload
desc.kernelAttributes.crossThreadDataSize = 40u;
desc.kernelAttributes.inlineDataPayloadSize = 64u;
EXPECT_EQ(0u, desc.getPerThreadDataOffset());
desc.kernelAttributes.crossThreadDataSize = 128u;
desc.kernelAttributes.inlineDataPayloadSize = 64u;
EXPECT_EQ(64u, desc.getPerThreadDataOffset());
}
|