File: hardware_interface.h

package info (click to toggle)
intel-compute-runtime 25.44.36015.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,632 kB
  • sloc: cpp: 931,547; lisp: 2,074; sh: 719; makefile: 162; python: 21
file content (144 lines) | stat: -rw-r--r-- 4,675 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*
 * Copyright (C) 2018-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once
#include "shared/source/command_stream/preemption_mode.h"
#include "shared/source/helpers/vec.h"

#include <cstdint>

template <typename T>
struct Vec3;

namespace NEO {

class CommandQueue;
class CsrDependencies;
class DispatchInfo;
class Event;
class IndirectHeap;
class Kernel;
class LinearStream;
class HwPerfCounter;
class HwTimeStamps;
class TagNodeBase;
class TimestampPacketContainer;
struct KernelOperation;
struct MultiDispatchInfo;
struct TimestampPacketDependencies;
struct KernelInfo;
struct EncodeWalkerArgs;
struct HardwareInfo;
template <class T>
class TagNode;
enum class DebugPauseState : uint32_t;

struct HardwareInterfaceWalkerArgs {
    size_t globalWorkSizes[3] = {};
    size_t localWorkSizes[3] = {};
    TagNodeBase *hwTimeStamps = nullptr;
    TagNodeBase *hwPerfCounter = nullptr;
    TagNodeBase *multiRootDeviceEventStamp = nullptr;
    TimestampPacketDependencies *timestampPacketDependencies = nullptr;
    TimestampPacketContainer *currentTimestampPacketNodes = nullptr;
    const Vec3<size_t> *numberOfWorkgroups = nullptr;
    const Vec3<size_t> *startOfWorkgroups = nullptr;
    KernelOperation *blockedCommandsData = nullptr;
    Event *event = nullptr;
    size_t currentDispatchIndex = 0;
    size_t offsetInterfaceDescriptorTable = 0;
    PreemptionMode preemptionMode = PreemptionMode::Initial;
    uint32_t commandType = 0;
    uint32_t interfaceDescriptorIndex = 0;
    bool isMainKernel = false;
    bool relaxedOrderingEnabled = false;
    bool blocking = false;
};

struct HardwareInterfaceHelper {
    static void setEncodeWalkerArgsExt(
        EncodeWalkerArgs &encodeWalkerArgs,
        const KernelInfo &kernelInfo);
};

template <typename GfxFamily>
class HardwareInterface {
  public:
    using DefaultWalkerType = typename GfxFamily::DefaultWalkerType;
    using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;

    template <typename WalkerType>
    static void dispatchWalker(
        CommandQueue &commandQueue,
        const MultiDispatchInfo &multiDispatchInfo,
        const CsrDependencies &csrDependencies,
        HardwareInterfaceWalkerArgs &walkerArgs);

    static void dispatchWalkerCommon(
        CommandQueue &commandQueue,
        const MultiDispatchInfo &multiDispatchInfo,
        const CsrDependencies &csrDependencies,
        HardwareInterfaceWalkerArgs &walkerArgs);

    static void getDefaultDshSpace(
        const size_t &offsetInterfaceDescriptorTable,
        CommandQueue &commandQueue,
        const MultiDispatchInfo &multiDispatchInfo,
        size_t &totalInterfaceDescriptorTableSize,
        IndirectHeap *dsh,
        LinearStream *commandStream);

    static void dispatchWorkarounds(
        LinearStream *commandStream,
        CommandQueue &commandQueue,
        Kernel &kernel,
        const bool &enable);

    static void dispatchProfilingPerfStartCommands(
        TagNodeBase *hwTimeStamps,
        TagNodeBase *hwPerfCounter,
        LinearStream *commandStream,
        CommandQueue &commandQueue);

    static void dispatchProfilingPerfEndCommands(
        TagNodeBase *hwTimeStamps,
        TagNodeBase *hwPerfCounter,
        LinearStream *commandStream,
        CommandQueue &commandQueue);

    static void dispatchDebugPauseCommands(
        LinearStream *commandStream,
        CommandQueue &commandQueue,
        DebugPauseState confirmationTrigger,
        DebugPauseState waitCondition,
        const HardwareInfo &hwInfo);

    template <typename WalkerType>
    static void programWalker(
        LinearStream &commandStream,
        Kernel &kernel,
        CommandQueue &commandQueue,
        IndirectHeap &dsh,
        IndirectHeap &ioh,
        IndirectHeap &ssh,
        const DispatchInfo &dispatchInfo,
        HardwareInterfaceWalkerArgs &walkerArgs);

    template <typename WalkerType>
    static WalkerType *allocateWalkerSpace(LinearStream &commandStream,
                                           const Kernel &kernel);

    static void obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo,
                                    bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh);

    template <typename WalkerType>
    static void dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, LinearStream &commandStream,
                                       IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh,
                                       HardwareInterfaceWalkerArgs &walkerArgs);
};

} // namespace NEO