File: kernel_slm_tests.cpp

package info (click to toggle)
intel-compute-runtime 20.44.18297-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 34,780 kB
  • sloc: cpp: 379,729; lisp: 4,931; python: 299; sh: 196; makefile: 8
file content (145 lines) | stat: -rw-r--r-- 5,477 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
 * Copyright (C) 2017-2020 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/program/kernel_info.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "test.h"

using namespace NEO;

struct KernelSLMAndBarrierTest : public ClDeviceFixture,
                                 public ::testing::TestWithParam<uint32_t> {
    void SetUp() override {
        ClDeviceFixture::SetUp();
        program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));

        memset(&dataParameterStream, 0, sizeof(dataParameterStream));
        dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);

        executionEnvironment = {};
        memset(&executionEnvironment, 0, sizeof(executionEnvironment));
        executionEnvironment.CompiledSIMD32 = 1;
        executionEnvironment.LargestCompiledSIMDSize = 32;

        memset(&threadPayload, 0, sizeof(threadPayload));
        threadPayload.LocalIDXPresent = 1;
        threadPayload.LocalIDYPresent = 1;
        threadPayload.LocalIDZPresent = 1;

        kernelInfo.heapInfo.pKernelHeap = kernelIsa;
        kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
        kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
        kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
        kernelInfo.patchInfo.threadPayload = &threadPayload;
    }
    void TearDown() override {
        ClDeviceFixture::TearDown();
    }

    uint32_t simd;
    uint32_t numChannels;

    std::unique_ptr<MockProgram> program;

    SKernelBinaryHeaderCommon kernelHeader;
    SPatchDataParameterStream dataParameterStream;
    SPatchExecutionEnvironment executionEnvironment;
    SPatchThreadPayload threadPayload;
    KernelInfo kernelInfo;

    uint32_t kernelIsa[32];
    uint32_t crossThreadData[32];
    uint32_t perThreadData[8];
};

static uint32_t slmSizeInKb[] = {1, 4, 8, 16, 32, 64};

HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgrammingSlmThenProgrammingIsCorrect) {
    ASSERT_NE(nullptr, pClDevice);
    CommandQueueHw<FamilyType> cmdQ(nullptr, pClDevice, 0, false);
    typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;

    // define kernel info
    executionEnvironment.HasBarriers = 1;
    kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB;

    MockKernel kernel(program.get(), kernelInfo, *pClDevice);
    ASSERT_EQ(CL_SUCCESS, kernel.initialize());

    // After creating Mock Kernel now create Indirect Heap
    auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);

    uint64_t interfaceDescriptorOffset = indirectHeap.getUsed();

    size_t offsetInterfaceDescriptorData = HardwareCommandsHelper<FamilyType>::sendInterfaceDescriptorData(
        indirectHeap,
        interfaceDescriptorOffset,
        0,
        sizeof(crossThreadData),
        sizeof(perThreadData),
        0,
        0,
        0,
        1,
        kernel,
        4u,
        pDevice->getPreemptionMode(),
        nullptr);

    // add the heap base + offset
    uint32_t *pIdData = (uint32_t *)indirectHeap.getCpuBase() + offsetInterfaceDescriptorData;

    INTERFACE_DESCRIPTOR_DATA *pSrcIDData = (INTERFACE_DESCRIPTOR_DATA *)pIdData;

    uint32_t ExpectedSLMSize = 0;

    if (::renderCoreFamily == IGFX_GEN8_CORE) {
        if (kernelInfo.workloadInfo.slmStaticSize <= (4 * 1024)) {
            ExpectedSLMSize = 1;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (8 * 1024)) {
            ExpectedSLMSize = 2;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (16 * 1024)) {
            ExpectedSLMSize = 4;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (32 * 1024)) {
            ExpectedSLMSize = 8;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (64 * 1024)) {
            ExpectedSLMSize = 16;
        }
    } else {
        if (kernelInfo.workloadInfo.slmStaticSize <= (1 * 1024)) // its a power of "2" +1 for example 1 is 2^0 ( 0+1); 2 is 2^1 is (1+1) etc.
        {
            ExpectedSLMSize = 1;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (2 * 1024)) {
            ExpectedSLMSize = 2;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (4 * 1024)) {
            ExpectedSLMSize = 3;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (8 * 1024)) {
            ExpectedSLMSize = 4;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (16 * 1024)) {
            ExpectedSLMSize = 5;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (32 * 1024)) {
            ExpectedSLMSize = 6;
        } else if (kernelInfo.workloadInfo.slmStaticSize <= (64 * 1024)) {
            ExpectedSLMSize = 7;
        }
    }
    ASSERT_GT(ExpectedSLMSize, 0u);
    EXPECT_EQ(ExpectedSLMSize, pSrcIDData->getSharedLocalMemorySize());
    EXPECT_EQ(!!executionEnvironment.HasBarriers, pSrcIDData->getBarrierEnable());
    EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, pSrcIDData->getDenormMode());
    EXPECT_EQ(4u, pSrcIDData->getBindingTableEntryCount());
}

INSTANTIATE_TEST_CASE_P(
    SlmSizes,
    KernelSLMAndBarrierTest,
    testing::ValuesIn(slmSizeInKb));