File: device_queue.h

package info (click to toggle)
intel-compute-runtime 20.44.18297-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 34,780 kB
  • sloc: cpp: 379,729; lisp: 4,931; python: 299; sh: 196; makefile: 8
file content (139 lines) | stat: -rw-r--r-- 5,291 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
 * Copyright (C) 2017-2020 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once
#include "shared/source/helpers/hw_info.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/graphics_allocation.h"

#include "opencl/source/api/cl_types.h"
#include "opencl/source/execution_model/device_enqueue.h"
#include "opencl/source/helpers/base_object.h"

namespace NEO {
class ClDevice;
class CommandQueue;
class Context;
class Device;
class Kernel;
class Event;
struct MultiDispatchInfo;
class SchedulerKernel;
struct HwTimeStamps;
template <class T>
struct TagNode;

template <>
struct OpenCLObjectMapper<_device_queue> {
    typedef class DeviceQueue DerivedType;
};

class DeviceQueue : public BaseObject<_device_queue> {
  public:
    static const cl_ulong objectMagic = 0x1734547890087154LL;

    DeviceQueue() {
        for (uint32_t i = 0; i < IndirectHeap::NUM_TYPES; i++) {
            heaps[i] = nullptr;
        }
        offsetDsh = 0;
    }
    DeviceQueue(Context *context, ClDevice *device, cl_queue_properties &properties);
    ~DeviceQueue() override;

    Device &getDevice();
    Context &getContext() { return *context; }
    cl_uint getQueueSize() { return queueSize; }
    cl_command_queue_properties getCommandQueueProperties() const { return commandQueueProperties; }
    const std::vector<uint64_t> &getPropertiesVector() const { return propertiesVector; }
    GraphicsAllocation *getQueueBuffer() { return queueBuffer; }
    GraphicsAllocation *getEventPoolBuffer() { return eventPoolBuffer; }
    GraphicsAllocation *getSlbBuffer() { return slbBuffer; }
    GraphicsAllocation *getStackBuffer() { return stackBuffer; }
    GraphicsAllocation *getQueueStorageBuffer() { return queueStorageBuffer; }
    GraphicsAllocation *getDshBuffer() { return dshBuffer; }
    GraphicsAllocation *getDebugQueue() { return debugQueue; }

    bool isProfilingEnabled() {
        return !!(commandQueueProperties & CL_QUEUE_PROFILING_ENABLE);
    }

    static DeviceQueue *create(Context *context,
                               ClDevice *device,
                               const cl_queue_properties &properties,
                               cl_int &errcodeRet);

    cl_int getCommandQueueInfo(cl_command_queue_info paramName,
                               size_t paramValueSize, void *paramValue,
                               size_t *paramValueSizeRet);

    void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp, bool isCcsUsed);

    virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed);
    virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount);

    MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
        auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
        auto igilCriticalSection = const_cast<volatile uint *>(&igilCmdQueue->m_controls.m_CriticalSection);
        return *igilCriticalSection == ExecutionModelCriticalSection::Free;
    }

    virtual void resetDeviceQueue();
    virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
    virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);

    void acquireEMCriticalSection() {
        if (DebugManager.flags.EnableNullHardware.get()) {
            return;
        }
        auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
        igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Taken;
    }

    uint32_t getDshOffset() const {
        return offsetDsh;
    }

    enum ExecutionModelCriticalSection {
        Free = 0,
        Taken = 1
    };

    static const uint32_t numberOfIDTables = 2;
    static const uint32_t interfaceDescriptorEntries = 64;
    static const uint32_t colorCalcStateSize = 192;
    static const uint32_t schedulerIDIndex = 62;
    static const uint32_t numberOfDeviceEnqueues;

  protected:
    void storeProperties(const cl_queue_properties *properties);
    void allocateResources();
    void initDeviceQueue();

    Context *context = nullptr;
    ClDevice *device = nullptr;
    cl_command_queue_properties commandQueueProperties = 0;
    std::vector<uint64_t> propertiesVector;
    cl_uint queueSize = 0;

    GraphicsAllocation *queueBuffer = nullptr;
    GraphicsAllocation *eventPoolBuffer = nullptr;
    GraphicsAllocation *slbBuffer = nullptr;
    GraphicsAllocation *stackBuffer = nullptr;
    GraphicsAllocation *queueStorageBuffer = nullptr;
    GraphicsAllocation *dshBuffer = nullptr;
    GraphicsAllocation *debugQueue = nullptr;

    DebugDataBuffer *debugData = nullptr;

    IndirectHeap *heaps[IndirectHeap::NUM_TYPES];
    uint32_t offsetDsh;
};

typedef DeviceQueue *(*DeviceQueueCreateFunc)(
    Context *context, ClDevice *device, cl_queue_properties &properties);
} // namespace NEO