File: clone_kernel_tests.cpp

package info (click to toggle)
intel-compute-runtime 25.48.36300.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 80,652 kB
  • sloc: cpp: 939,022; lisp: 2,090; sh: 722; makefile: 162; python: 21
file content (521 lines) | stat: -rw-r--r-- 31,325 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
/*
 * Copyright (C) 2018-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/utilities/base_object_utils.h"

#include "opencl/source/helpers/sampler_helpers.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/mocks/mock_sampler.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"

#include "CL/cl.h"
#include "gtest/gtest.h"

#include <memory>

using namespace NEO;

class CloneKernelTest : public MultiRootDeviceWithSubDevicesFixture {
  public:
    CloneKernelTest() {
    }

  protected:
    void SetUp() override {
        MultiRootDeviceWithSubDevicesFixture::SetUp();
        pProgram = std::make_unique<MockProgram>(context.get(), false, context->getDevices());

        // define kernel info
        pKernelInfo = std::make_unique<MockKernelInfo>();

        pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.resize(1);

        pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
        pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 72;
        pKernelInfo->setPrivateMemory(0x10, false, 8, 64, 64);
        pKernelInfo->heapInfo.surfaceStateHeapSize = sizeof(surfaceStateHeap);
        pKernelInfo->heapInfo.pSsh = surfaceStateHeap;

        KernelInfoContainer kernelInfos;
        kernelInfos.resize(3);
        kernelInfos[0] = kernelInfos[1] = kernelInfos[2] = pKernelInfo.get();

        KernelVectorType sourceKernels;
        sourceKernels.resize(3);

        KernelVectorType clonedKernels;
        clonedKernels.resize(3);

        for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {

            pSourceKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]);
            ASSERT_EQ(CL_SUCCESS, pSourceKernel[rootDeviceIndex]->initialize());
            sourceKernels[rootDeviceIndex] = pSourceKernel[rootDeviceIndex];

            pClonedKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]);
            ASSERT_EQ(CL_SUCCESS, pClonedKernel[rootDeviceIndex]->initialize());
            clonedKernels[rootDeviceIndex] = pClonedKernel[rootDeviceIndex];
        }

        pSourceMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(sourceKernels, kernelInfos);
        pClonedMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(clonedKernels, kernelInfos);
    }

    void TearDown() override {
        pClonedMultiDeviceKernel.reset();
        pSourceMultiDeviceKernel.reset();
        pKernelInfo.reset();
        pProgram.reset();
        MultiRootDeviceWithSubDevicesFixture::TearDown();
    }

    cl_int retVal = CL_SUCCESS;
    std::unique_ptr<MockProgram> pProgram;
    std::unique_ptr<MultiDeviceKernel> pSourceMultiDeviceKernel;
    std::unique_ptr<MultiDeviceKernel> pClonedMultiDeviceKernel;
    MockKernel *pSourceKernel[3] = {nullptr};
    MockKernel *pClonedKernel[3] = {nullptr};
    std::unique_ptr<MockKernelInfo> pKernelInfo;
    char surfaceStateHeap[128];
};

TEST_F(CloneKernelTest, givenKernelWithPrivateSurfaceWhenCloningKernelThenClonedKernelProgramItsOwnPrivateSurfaceAddress) {
    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        auto pSourcePrivateSurface = pSourceKernel[rootDeviceIndex]->privateSurface;
        auto pClonedPrivateSurface = pClonedKernel[rootDeviceIndex]->privateSurface;
        EXPECT_NE(nullptr, pSourcePrivateSurface);
        EXPECT_NE(nullptr, pClonedPrivateSurface);
        EXPECT_NE(pClonedPrivateSurface, pSourcePrivateSurface);
        {
            auto pSourcePrivateSurfPatchedAddress = reinterpret_cast<uint64_t *>(ptrOffset(pSourceKernel[rootDeviceIndex]->getCrossThreadData(), 64));
            auto pClonedPrivateSurfPatchedAddress = reinterpret_cast<uint64_t *>(ptrOffset(pClonedKernel[rootDeviceIndex]->getCrossThreadData(), 64));

            EXPECT_EQ(pSourcePrivateSurface->getGpuAddressToPatch(), *pSourcePrivateSurfPatchedAddress);
            EXPECT_EQ(pClonedPrivateSurface->getGpuAddressToPatch(), *pClonedPrivateSurfPatchedAddress);
        }

        retVal = pClonedKernel[rootDeviceIndex]->cloneKernel(pSourceKernel[rootDeviceIndex]);
        EXPECT_EQ(CL_SUCCESS, retVal);

        auto pClonedPrivateSurface2 = pClonedKernel[rootDeviceIndex]->privateSurface;
        EXPECT_EQ(pClonedPrivateSurface, pClonedPrivateSurface2);
        {
            auto pClonedPrivateSurfPatchedAddress = reinterpret_cast<uint64_t *>(ptrOffset(pClonedKernel[rootDeviceIndex]->getCrossThreadData(), 64));
            EXPECT_EQ(pClonedPrivateSurface->getGpuAddressToPatch(), *pClonedPrivateSurfPatchedAddress);
        }
    }
}

TEST_F(CloneKernelTest, givenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) {
    pKernelInfo->addArgBuffer(0);
    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(nullptr, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(nullptr, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_FALSE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }
}

TEST_F(CloneKernelTest, givenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) {
    const size_t slmSize = 0x800;
    pKernelInfo->addArgLocal(0, 0, 1);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgLocal);
        pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgLocal);
    }
    retVal = pSourceMultiDeviceKernel->setArg(0, slmSize, nullptr);
    ASSERT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(Kernel::SLM_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

        EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->slmTotalSize);
    }
}

TEST_F(CloneKernelTest, givenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) {
    pKernelInfo->addArgBuffer(0, 0x20, sizeof(uint64_t));

    auto buffer = clUniquePtr(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal));
    cl_mem memObj = buffer.get();

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgBuffer);
        pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgBuffer);
    }

    retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(cl_mem), &memObj);
    ASSERT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(Kernel::BUFFER_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, *static_cast<const cl_mem *>(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value));
        EXPECT_EQ(pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object, *static_cast<const cl_mem *>(pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value));
        EXPECT_EQ(*static_cast<const cl_mem *>(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value), *static_cast<const cl_mem *>(pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value));

        auto pKernelArg = reinterpret_cast<uint64_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
                                                       pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
        EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg);
    }
}

TEST_F(CloneKernelTest, givenArgImageWhenCloningKernelThenKernelInfoIsCorrect) {
    pKernelInfo->addArgImage(0, 0x20);
    auto &metaPayload = pKernelInfo->argAsImg(0).metadataPayload;
    metaPayload.imgWidth = 0x4;
    metaPayload.imgHeight = 0x8;
    metaPayload.imgDepth = 0xc;

    auto image = std::unique_ptr<Image>(Image2dHelperUlt<>::create(context.get()));
    ASSERT_NE(nullptr, image);

    auto rootDeviceIndex = *context->getRootDeviceIndices().begin();
    size_t imageWidth = image->getImageDesc().image_width;
    size_t imageHeight = image->getImageDesc().image_height;
    size_t imageDepth = image->getImageDesc().image_depth;

    cl_mem memObj = image.get();

    pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgImage);
    pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgImage);

    retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_mem), &memObj);
    ASSERT_EQ(CL_SUCCESS, retVal);

    EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
    EXPECT_EQ(Kernel::IMAGE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
    EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
    EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
    EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
    EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

    auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
    auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescImage>();

    auto pImgWidth = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgWidth);
    EXPECT_EQ(imageWidth, *pImgWidth);

    auto pImgHeight = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgHeight);
    EXPECT_EQ(imageHeight, *pImgHeight);

    auto pImgDepth = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgDepth);
    EXPECT_EQ(imageDepth, *pImgDepth);
}

TEST_F(CloneKernelTest, givenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) {
    auto sampler = clUniquePtr<Sampler>(new MockSampler(context.get(),
                                                        true,
                                                        (cl_addressing_mode)CL_ADDRESS_MIRRORED_REPEAT,
                                                        (cl_filter_mode)CL_FILTER_NEAREST));

    pKernelInfo->addArgSampler(0, 0x20, 0x8, 0x10, 0x4);

    cl_sampler samplerObj = sampler.get();
    auto rootDeviceIndex = *context->getRootDeviceIndices().begin();

    pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgSampler);
    pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgSampler);

    retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_sampler), &samplerObj);
    ASSERT_EQ(CL_SUCCESS, retVal);

    EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
    EXPECT_EQ(Kernel::SAMPLER_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
    EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
    EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
    EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
    EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
    EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

    auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
    const auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescSampler>();

    auto pSnapWa = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerSnapWa);
    EXPECT_EQ(sampler->getSnapWaValue(), *pSnapWa);

    auto pAddressingMode = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerAddressingMode);
    EXPECT_EQ(getAddrModeEnum(sampler->addressingMode), *pAddressingMode);

    auto pNormalizedCoords = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerNormalizedCoords);
    EXPECT_EQ(getNormCoordsEnum(sampler->normalizedCoordinates), *pNormalizedCoords);
    EXPECT_EQ(3, sampler->getRefInternalCount());
}

TEST_F(CloneKernelTest, givenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
    char *svmPtr = new char[256];

    pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *));

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        retVal = pSourceKernel[rootDeviceIndex]->setArgSvm(0, 256, svmPtr, nullptr, 0u);
        ASSERT_EQ(CL_SUCCESS, retVal);

        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(Kernel::SVM_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

        auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
                                    pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
        EXPECT_EQ(svmPtr, *pKernelArg);
    }

    delete[] svmPtr;
}

TEST_F(CloneKernelTest, givenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect) {
    const ClDeviceInfo &devInfo = device1->getDeviceInfo();
    if (devInfo.svmCapabilities == 0) {
        GTEST_SKIP();
    }

    pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *));

    char memory[100] = {};
    MultiGraphicsAllocation multiGraphicsAllocation(3);
    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        auto svmAlloc = new MockGraphicsAllocation(rootDeviceIndex, memory, 100);
        multiGraphicsAllocation.addAllocation(svmAlloc);
    }

    retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation, 1u);
    ASSERT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(Kernel::SVM_ALLOC_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

        auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
                                    pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
        EXPECT_EQ(memory, *pKernelArg);
        delete multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
    }
}

TEST_F(CloneKernelTest, givenArgImmediateWhenCloningKernelThenKernelInfoIsCorrect) {
    pKernelInfo->addArgImmediate(0, sizeof(void *), 0x20);

    using TypeParam = unsigned long;
    auto value = (TypeParam)0xAA55AA55UL;

    retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(TypeParam), &value);
    ASSERT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);

        auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
                                        pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescValue>().elements[0].offset);
        EXPECT_EQ(value, *pKernelArg);
    }
}

TEST_F(CloneKernelTest, givenExecInfoWhenCloningKernelThenSvmAllocationIsCorrect) {
    void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
    ASSERT_NE(nullptr, ptrSVM);

    auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
    ASSERT_NE(nullptr, svmData);
    auto &pSvmAllocs = svmData->gpuAllocations;

    pSourceMultiDeviceKernel->setSvmKernelExecInfo(pSvmAllocs);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size());
        EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0));
        EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0));
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0));
    }

    context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
}

TEST_F(CloneKernelTest, givenUnifiedMemoryExecInfoWhenCloningKernelThenUnifiedMemoryAllocationIsCorrect) {
    void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
    ASSERT_NE(nullptr, ptrSVM);

    auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
    ASSERT_NE(nullptr, svmData);
    auto &pSvmAllocs = svmData->gpuAllocations;

    pSourceMultiDeviceKernel->setUnifiedMemoryExecInfo(pSvmAllocs);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size());
        EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0));
        EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0));
    }

    retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    EXPECT_EQ(CL_SUCCESS, retVal);

    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size());
        EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0));
    }

    context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
}

TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) {
    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        pSourceKernel[rootDeviceIndex]->isBuiltIn = true;
    }
    pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
    for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
        EXPECT_TRUE(pClonedKernel[rootDeviceIndex]->isBuiltIn);
    }
}