File: kernel_info.h

package info (click to toggle)
intel-compute-runtime 25.48.36300.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 80,652 kB
  • sloc: cpp: 939,022; lisp: 2,090; sh: 722; makefile: 162; python: 21
file content (135 lines) | stat: -rw-r--r-- 4,777 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*
 * Copyright (C) 2018-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/program/heap_info.h"
#include "shared/source/utilities/arrayref.h"

#include <cstdint>
#include <string>
#include <utility>
#include <vector>

namespace gtpin {
typedef struct igc_info_s igc_info_t;
}

namespace NEO {
struct HardwareInfo;
class BuiltinDispatchInfoBuilder;
class Device;
class Kernel;
struct KernelInfo;
class DispatchInfo;
struct KernelArgumentType;
class GraphicsAllocation;
class MemoryManager;

static const float yTilingRatioValue = 1.3862943611198906188344642429164f;

struct DeviceInfoKernelPayloadConstants {
    void *slmWindow = nullptr;
    uint32_t slmWindowSize = 0U;
    uint32_t computeUnitsUsedForScratch = 0U;
    uint32_t maxWorkGroupSize = 0U;
};

struct KernelInfo : NEO::NonCopyableAndNonMovableClass {
  public:
    KernelInfo() = default;
    ~KernelInfo();

    GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }

    const ArgDescriptor &getArgDescriptorAt(uint32_t index) const {
        DEBUG_BREAK_IF(index >= kernelDescriptor.payloadMappings.explicitArgs.size());
        return kernelDescriptor.payloadMappings.explicitArgs[index];
    }
    const StackVec<ArgDescriptor, 16> &getExplicitArgs() const {
        return kernelDescriptor.payloadMappings.explicitArgs;
    }
    const ArgTypeMetadataExtended &getExtendedMetadata(uint32_t index) const {
        DEBUG_BREAK_IF(index >= kernelDescriptor.explicitArgsExtendedMetadata.size());
        return kernelDescriptor.explicitArgsExtendedMetadata[index];
    }
    size_t getSamplerStateArrayCount() const;
    size_t getBorderColorOffset() const;
    unsigned int getMaxSimdSize() const {
        return kernelDescriptor.kernelAttributes.simdSize;
    }
    bool requiresSubgroupIndependentForwardProgress() const {
        return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress;
    }
    size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
        auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
        auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
        auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
        size_t maxRequiredWorkGroupSize = static_cast<size_t>(requiredWorkGroupSizeX) * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
        if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
            maxRequiredWorkGroupSize = maxWorkGroupSize;
        }
        return maxRequiredWorkGroupSize;
    }

    uint32_t getConstantBufferSize() const;
    int32_t getArgNumByName(const char *name) const;

    bool createKernelAllocation(const Device &device, bool internalIsa);
    void apply(const DeviceInfoKernelPayloadConstants &constants);

    uint32_t getIsaSize() const;
    GraphicsAllocation *getIsaGraphicsAllocation() const;
    void setIsaPerKernelAllocation(GraphicsAllocation *allocation);

    inline GraphicsAllocation *getIsaParentAllocation() const {
        return isaParentAllocation;
    }
    inline void setIsaParentAllocation(GraphicsAllocation *allocation) {
        isaParentAllocation = allocation;
    }
    inline size_t getIsaOffsetInParentAllocation() const {
        DEBUG_BREAK_IF(this->kernelAllocation != nullptr && 0u != isaSubAllocationOffset);
        return isaSubAllocationOffset;
    }
    inline void setIsaSubAllocationOffset(size_t offset) {
        isaSubAllocationOffset = offset;
    }
    inline void setIsaSubAllocationSize(size_t size) {
        isaSubAllocationSize = size;
    }
    inline size_t getIsaSubAllocationSize() const {
        return isaSubAllocationSize;
    }

    HeapInfo heapInfo = {};
    std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
    char *crossThreadData = nullptr;
    const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
    uint32_t systemKernelOffset = 0;
    uint64_t kernelId = 0;
    bool isKernelHeapSubstituted = false;
    GraphicsAllocation *kernelAllocation = nullptr;
    DebugData debugData;
    bool computeMode = false;
    const gtpin::igc_info_t *igcInfoForGtpin = nullptr;

    uint64_t shaderHashCode;
    KernelDescriptor kernelDescriptor;

  private:
    GraphicsAllocation *isaParentAllocation = nullptr;
    size_t isaSubAllocationOffset = 0lu;
    size_t isaSubAllocationSize = 0lu;
};

static_assert(NEO::NonCopyableAndNonMovable<KernelInfo>);

std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos);

} // namespace NEO