File: ioctl_helper_xe.h

package info (click to toggle)
intel-compute-runtime 25.48.36300.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 80,652 kB
  • sloc: cpp: 939,022; lisp: 2,090; sh: 722; makefile: 162; python: 21
file content (245 lines) | stat: -rw-r--r-- 13,902 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/*
 * Copyright (C) 2023-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/os_interface/linux/drm_debug.h"
#include "shared/source/os_interface/linux/engine_info.h"
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/xe/eudebug/eudebug_interface.h"

#include <bitset>
#include <mutex>
#include <optional>

namespace NEO {

namespace XeDrm {
struct drm_xe_engine_class_instance; // NOLINT(readability-identifier-naming)
struct drm_xe_query_gt_list;         // NOLINT(readability-identifier-naming)
struct drm_xe_query_config;          // NOLINT(readability-identifier-naming)
} // namespace XeDrm

enum class EngineClass : uint16_t;

struct BindInfo {
    uint64_t userptr;
    uint64_t addr;
};

class IoctlHelperXe : public IoctlHelper {
  public:
    using GtIdContainer = StackVec<int, 4>;

    using IoctlHelper::IoctlHelper;
    static std::unique_ptr<IoctlHelperXe> create(Drm &drmArg);
    static bool queryDeviceIdAndRevision(Drm &drm);
    IoctlHelperXe(Drm &drmArg);
    ~IoctlHelperXe() override;
    int ioctl(DrmIoctl request, void *arg) override;
    int ioctl(int fd, DrmIoctl request, void *arg) override;
    bool initialize() override;
    bool isSetPairAvailable() override;
    bool isChunkingAvailable() override;
    bool isVmBindAvailable() override;
    int createGemExt(const MemRegionsVec &memClassInstances, size_t allocSize, uint32_t &handle, uint64_t patIndex, std::optional<uint32_t> vmId, int32_t pairHandle, bool isChunked, uint32_t numOfChunks, std::optional<uint32_t> memPolicyMode, std::optional<std::vector<unsigned long>> memPolicyNodemask, std::optional<bool> isCoherent) override;
    uint32_t createGem(uint64_t size, uint32_t memoryBanks, std::optional<bool> isCoherent) override;
    CacheRegion closAlloc(CacheLevel cacheLevel) override;
    uint16_t closAllocWays(CacheRegion closIndex, uint16_t cacheLevel, uint16_t numWays) override;
    CacheRegion closFree(CacheRegion closIndex) override;
    int waitUserFence(uint32_t ctxId, uint64_t address,
                      uint64_t value, uint32_t dataWidth, int64_t timeout, uint16_t flags,
                      bool userInterrupt, uint32_t externalInterruptId, GraphicsAllocation *allocForInterruptWait) override;
    uint32_t getAtomicAdvise(bool isNonAtomic) override;
    uint32_t getAtomicAccess(AtomicAccessMode mode) override;
    uint64_t getPreferredLocationArgs(MemAdvise memAdviseOp) override;
    uint32_t getPreferredLocationAdvise() override;
    std::optional<MemoryClassInstance> getPreferredLocationRegion(PreferredLocation memoryLocation, uint32_t memoryInstance) override;
    bool setVmBoAdvise(int32_t handle, uint32_t attribute, void *region) override;
    bool setVmSharedSystemMemAdvise(uint64_t handle, const size_t size, const uint32_t attribute, const uint64_t param, const std::vector<uint32_t> &vmIds) override;
    AtomicAccessMode getVmSharedSystemAtomicAttribute(uint64_t handle, const size_t size, const uint32_t vmId) override;
    bool setVmBoAdviseForChunking(int32_t handle, uint64_t start, uint64_t length, uint32_t attribute, void *region) override;
    bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override;
    bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override;
    bool setGemTiling(void *setTiling) override;
    bool getGemTiling(void *setTiling) override;
    uint32_t getDirectSubmissionFlag() override;
    std::unique_ptr<uint8_t[]> prepareVmBindExt(const StackVec<uint32_t, 2> &bindExtHandles, uint64_t cookie) override;
    uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident, bool bindLock, bool readOnlyResource) override;
    virtual std::string xeGetBindFlagNames(int bindFlags);
    int queryDistances(std::vector<QueryItem> &queryItems, std::vector<DistanceInfo> &distanceInfos) override;
    uint16_t getWaitUserFenceSoftFlag() override;
    int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) override;
    bool completionFenceExtensionSupported(const bool isVmBindAvailable) override;
    bool isPageFaultSupported() override;
    std::unique_ptr<uint8_t[]> createVmControlExtRegion(const std::optional<MemoryClassInstance> &regionInstanceClass) override;
    uint32_t getFlagsForVmCreate(bool disableScratch, bool enablePageFault, bool useVmBind) override;
    uint32_t createContextWithAccessCounters(GemContextCreateExt &gcc) override;
    uint32_t createCooperativeContext(GemContextCreateExt &gcc) override;
    void fillVmBindExtSetPat(VmBindExtSetPatT &vmBindExtSetPat, uint64_t patIndex, uint64_t nextExtension) override;
    void fillVmBindExtUserFence(VmBindExtUserFenceT &vmBindExtUserFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override;
    void setVmBindUserFence(VmBindParams &vmBind, VmBindExtUserFenceT vmBindUserFence) override;
    std::optional<uint32_t> getVmAdviseAtomicAttribute() override;
    int vmBind(const VmBindParams &vmBindParams) override;
    int vmUnbind(const VmBindParams &vmBindParams) override;
    int getResetStats(ResetStats &resetStats, uint32_t *status, ResetStatsFault *resetStatsFault) override;
    bool isEuStallSupported() override;
    uint32_t getEuStallFdParameter() override;
    bool perfOpenEuStallStream(uint32_t euStallFdParameter, uint32_t &samplingPeriodNs, uint64_t engineInstance, uint64_t notifyNReports, uint64_t gpuTimeStampfrequency, int32_t *stream) override;
    bool perfDisableEuStallStream(int32_t *stream) override;
    MOCKABLE_VIRTUAL int perfOpenIoctl(DrmIoctl request, void *arg);
    unsigned int getIoctlRequestValuePerf(DrmIoctl ioctlRequest) const;
    UuidRegisterResult registerUuid(const std::string &uuid, uint32_t uuidClass, uint64_t ptr, uint64_t size) override;
    UuidRegisterResult registerStringClassUuid(const std::string &uuid, uint64_t ptr, uint64_t size) override;
    int unregisterUuid(uint32_t handle) override;
    bool isContextDebugSupported() override;
    int setContextDebugFlag(uint32_t drmContextId) override;
    bool isDebugAttachAvailable() override;
    int getEuDebugSysFsEnable() override;
    unsigned int getIoctlRequestValue(DrmIoctl ioctlRequest) const override;
    unsigned int getIoctlRequestValueDebugger(DrmIoctl ioctlRequest) const;
    int getDrmParamValue(DrmParam drmParam) const override;
    int getDrmParamValueBase(DrmParam drmParam) const override;
    std::string getIoctlString(DrmIoctl ioctlRequest) const override;
    int createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex, bool allocateInterrupt) override;
    std::string getDrmParamString(DrmParam param) const override;
    bool getTopologyDataAndMap(HardwareInfo &hwInfo, DrmQueryTopologyData &topologyData, TopologyMap &topologyMap) override;
    std::string getFileForMaxGpuFrequency() const override;
    std::string getFileForMaxGpuFrequencyOfSubDevice(int subDeviceId) const override;
    std::string getFileForMaxMemoryFrequencyOfSubDevice(int subDeviceId) const override;
    void configureCcsMode(std::vector<std::string> &files, const std::string expectedPrefix, uint32_t ccsMode,
                          std::vector<std::tuple<std::string, uint32_t>> &deviceCcsModeVec) override;
    bool getFabricLatency(uint32_t fabricId, uint32_t &latency, uint32_t &bandwidth) override;
    bool requiresUserFenceSetup(bool bind) const override;
    std::unique_ptr<EngineInfo> createEngineInfo(bool isSysmanEnabled) override;
    std::unique_ptr<MemoryInfo> createMemoryInfo() override;
    size_t getLocalMemoryRegionsSize(const MemoryInfo *memoryInfo, uint32_t subDevicesCount, uint32_t deviceBitfield) const override;
    void setupIpVersion() override;

    bool setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) override;
    bool getFdFromVmExport(uint32_t vmId, uint32_t flags, int32_t *fd) override;
    bool isImmediateVmBindRequired() const override;
    void fillExecObject(ExecObject &execObject, uint32_t handle, uint64_t gpuAddress, uint32_t drmContextId, bool bindInfo, bool isMarkedForCapture) override;
    void logExecObject(const ExecObject &execObject, std::stringstream &logger, size_t size) override;
    void fillExecBuffer(ExecBuffer &execBuffer, uintptr_t buffersPtr, uint32_t bufferCount, uint32_t startOffset, uint32_t size, uint64_t flags, uint32_t drmContextId) override;
    void logExecBuffer(const ExecBuffer &execBuffer, std::stringstream &logger) override;
    bool setDomainCpu(uint32_t handle, bool writeEnable) override;
    uint16_t getCpuCachingMode(std::optional<bool> isCoherent, bool allocationInSystemMemory) const;
    void addDebugMetadata(DrmResourceClass type, uint64_t *offset, uint64_t size);
    uint32_t registerResource(DrmResourceClass classType, const void *data, size_t size) override;
    void unregisterResource(uint32_t handle) override;
    void insertEngineToContextParams(ContextParamEngines<> &contextParamEngines, uint32_t engineId, const EngineClassInstance *engineClassInstance, uint32_t tileId, bool hasVirtualEngines) override;
    void registerBOBindHandle(Drm *drm, DrmAllocation *drmAllocation) override;
    bool resourceRegistrationEnabled() override { return true; }
    bool isPreemptionSupported() override { return true; }
    bool isTimestampsRefreshEnabled() override { return true; }
    uint32_t getTileIdFromGtId(uint32_t gtId) const override {
        return gtIdToTileId[gtId];
    }
    uint32_t getGtIdFromTileId(uint32_t tileId, uint16_t engineClass) const override;
    bool makeResidentBeforeLockNeeded() const override;
    bool isSmallBarConfigAllowed() const override { return false; }
    void *pciBarrierMmap() override;
    bool retrieveMmapOffsetForBufferObject(BufferObject &bo, uint64_t flags, uint64_t &offset) override;
    bool is2MBSizeAlignmentRequired(AllocationType allocationType) const override;

  protected:
    static constexpr uint32_t maxContextSetProperties = 4;

    virtual const char *xeGetClassName(int className) const;
    const char *xeGetBindOperationName(int bindOperation);
    const char *xeGetAdviseOperationName(int adviseOperation);

    const char *xeGetengineClassName(uint32_t engineClass);
    template <typename DataType>
    std::vector<DataType> queryData(uint32_t queryId);
    virtual int xeWaitUserFence(uint32_t ctxId, uint16_t op, uint64_t addr, uint64_t value, int64_t timeout, bool userInterrupt, uint32_t externalInterruptId, GraphicsAllocation *allocForInterruptWait);
    void setupXeWaitUserFenceStruct(void *arg, uint32_t ctxId, uint16_t op, uint64_t addr, uint64_t value, int64_t timeout);
    int xeVmBind(const VmBindParams &vmBindParams, bool bindOp);
    void xeShowBindTable();
    void updateBindInfo(uint64_t userPtr);
    int debuggerOpenIoctl(DrmIoctl request, void *arg);
    int debuggerMetadataCreateIoctl(DrmIoctl request, void *arg);
    int debuggerMetadataDestroyIoctl(DrmIoctl request, void *arg);
    int getEudebugExtProperty();
    uint64_t getEudebugExtPropertyValue();
    virtual bool isMediaEngine(uint16_t engineClass) const { return false; }
    virtual bool isMediaGt(uint16_t gtType) const;
    virtual void setContextPropertiesForRootDeviceContext(const OsContextLinux &osContext, uint32_t deviceIndex, void *extProperties, uint32_t &extIndexInOut){};

    virtual bool isPrimaryContext(const OsContextLinux &osContext, uint32_t deviceIndex);
    virtual uint32_t getPrimaryContextId(const OsContextLinux &osContext, uint32_t deviceIndex, size_t contextIndex);
    virtual uint64_t getPrimaryContextProperties() const;

    struct UserFenceExtension {
        static constexpr uint32_t tagValue = 0x123987;
        uint32_t tag;
        uint64_t addr;
        uint64_t value;
    };

    uint16_t getDefaultEngineClass(const aub_stream::EngineType &defaultEngineType);
    void setOptionalContextProperties(const OsContextLinux &osContext, Drm &drm, void *extProperties, uint32_t &extIndexInOut);
    virtual void setContextProperties(const OsContextLinux &osContext, uint32_t deviceIndex, void *extProperties, uint32_t &extIndexInOut);
    virtual void applyContextFlags(void *execQueueCreate, bool allocateInterrupt);

    struct GtIpVersion {
        uint16_t major;
        uint16_t minor;
        uint16_t revision;
    };
    bool queryHwIpVersion(GtIpVersion &gtIpVersion);

    bool isLowLatencyHintAvailable = false;
    int maxExecQueuePriority = 0;
    std::mutex xeLock;
    std::mutex gemCloseLock;
    std::vector<BindInfo> bindInfo;
    std::vector<uint32_t> hwconfig;
    std::vector<XeDrm::drm_xe_engine_class_instance> contextParamEngine;

    std::vector<uint64_t> queryGtListData;
    constexpr static int invalidIndex = -1;
    GtIdContainer gtIdToTileId;
    GtIdContainer tileIdToGtId;
    GtIdContainer mediaGtIdToTileId;
    GtIdContainer tileIdToMediaGtId;
    XeDrm::drm_xe_query_gt_list *xeGtListData = nullptr;

    std::unique_ptr<XeDrm::drm_xe_engine_class_instance> defaultEngine;
    struct DebugMetadata {
        DrmResourceClass type;
        uint64_t offset;
        uint64_t size;
        bool isCookie;
    };

    template <typename... XeLogArgs>
    void xeLog(XeLogArgs &&...args) const;

    struct ExecObjectXe {
        uint64_t gpuAddress;
        uint32_t handle;
    };

    struct ExecBufferXe {
        ExecObjectXe *execObject;
        uint64_t startOffset;
        uint32_t drmContextId;
    };

    std::unique_ptr<EuDebugInterface> euDebugInterface;
};

template <typename... XeLogArgs>
void IoctlHelperXe::xeLog(XeLogArgs &&...args) const {
    if (debugManager.flags.PrintXeLogs.get()) {
        PRINT_DEBUG_STRING(debugManager.flags.PrintXeLogs.get(), stderr, args...);
    }
}

} // namespace NEO