File: kernel_info.h

package info (click to toggle)
intel-compute-runtime 25.44.36015.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,632 kB
  • sloc: cpp: 931,547; lisp: 2,074; sh: 719; makefile: 162; python: 21
file content (106 lines) | stat: -rw-r--r-- 3,748 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*
 * Copyright (C) 2018-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/program/heap_info.h"
#include "shared/source/utilities/arrayref.h"

#include <cstdint>
#include <string>
#include <utility>
#include <vector>

namespace gtpin {
typedef struct igc_info_s igc_info_t;
}

namespace NEO {
struct HardwareInfo;
class BuiltinDispatchInfoBuilder;
class Device;
class Kernel;
struct KernelInfo;
class DispatchInfo;
struct KernelArgumentType;
class GraphicsAllocation;
class MemoryManager;

static const float yTilingRatioValue = 1.3862943611198906188344642429164f;

struct DeviceInfoKernelPayloadConstants {
    void *slmWindow = nullptr;
    uint32_t slmWindowSize = 0U;
    uint32_t computeUnitsUsedForScratch = 0U;
    uint32_t maxWorkGroupSize = 0U;
};

struct KernelInfo : NEO::NonCopyableAndNonMovableClass {
  public:
    KernelInfo() = default;
    ~KernelInfo();

    GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }

    const ArgDescriptor &getArgDescriptorAt(uint32_t index) const {
        DEBUG_BREAK_IF(index >= kernelDescriptor.payloadMappings.explicitArgs.size());
        return kernelDescriptor.payloadMappings.explicitArgs[index];
    }
    const StackVec<ArgDescriptor, 16> &getExplicitArgs() const {
        return kernelDescriptor.payloadMappings.explicitArgs;
    }
    const ArgTypeMetadataExtended &getExtendedMetadata(uint32_t index) const {
        DEBUG_BREAK_IF(index >= kernelDescriptor.explicitArgsExtendedMetadata.size());
        return kernelDescriptor.explicitArgsExtendedMetadata[index];
    }
    size_t getSamplerStateArrayCount() const;
    size_t getBorderColorOffset() const;
    unsigned int getMaxSimdSize() const {
        return kernelDescriptor.kernelAttributes.simdSize;
    }
    bool requiresSubgroupIndependentForwardProgress() const {
        return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress;
    }
    size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
        auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
        auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
        auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
        size_t maxRequiredWorkGroupSize = static_cast<size_t>(requiredWorkGroupSizeX) * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
        if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
            maxRequiredWorkGroupSize = maxWorkGroupSize;
        }
        return maxRequiredWorkGroupSize;
    }

    uint32_t getConstantBufferSize() const;
    int32_t getArgNumByName(const char *name) const;

    bool createKernelAllocation(const Device &device, bool internalIsa);
    void apply(const DeviceInfoKernelPayloadConstants &constants);

    HeapInfo heapInfo = {};
    std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
    char *crossThreadData = nullptr;
    const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
    uint32_t systemKernelOffset = 0;
    uint64_t kernelId = 0;
    bool isKernelHeapSubstituted = false;
    GraphicsAllocation *kernelAllocation = nullptr;
    DebugData debugData;
    bool computeMode = false;
    const gtpin::igc_info_t *igcInfoForGtpin = nullptr;

    uint64_t shaderHashCode;
    KernelDescriptor kernelDescriptor;
};

static_assert(NEO::NonCopyableAndNonMovable<KernelInfo>);

std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos);

} // namespace NEO