File: direct_submission_controller.h

package info (click to toggle)
intel-compute-runtime 25.35.35096.9-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,324 kB
  • sloc: cpp: 926,243; lisp: 3,433; sh: 715; makefile: 162; python: 21
file content (129 lines) | stat: -rw-r--r-- 4,382 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
 * Copyright (C) 2019-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once

#include "shared/source/command_stream/queue_throttle.h"
#include "shared/source/command_stream/task_count_helper.h"
#include "shared/source/helpers/device_bitfield.h"

#include <array>
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <optional>
#include <queue>
#include <unordered_map>

namespace NEO {
class MemoryManager;
class CommandStreamReceiver;
class Thread;
class ProductHelper;

using SteadyClock = std::chrono::steady_clock;
using HighResolutionClock = std::chrono::high_resolution_clock;

struct WaitForPagingFenceRequest {
    CommandStreamReceiver *csr;
    uint64_t pagingFenceValue;
};

enum class TimeoutElapsedMode {
    notElapsed,
    bcsOnly,
    fullyElapsed
};

class DirectSubmissionController {
  public:
    static constexpr size_t defaultTimeout = 5'000;
    static constexpr size_t timeToPollTagUpdateNS = 20'000;
    DirectSubmissionController();
    virtual ~DirectSubmissionController();

    void registerDirectSubmission(CommandStreamReceiver *csr);
    void unregisterDirectSubmission(CommandStreamReceiver *csr);

    void startThread();
    void startControlling();
    void stopThread();

    static bool isSupported();

    void enqueueWaitForPagingFence(CommandStreamReceiver *csr, uint64_t pagingFenceValue);
    void drainPagingFenceQueue();

  protected:
    struct DirectSubmissionState {
        DirectSubmissionState(DirectSubmissionState &&other) noexcept {
            isStopped = other.isStopped.load();
            taskCount = other.taskCount.load();
        }
        DirectSubmissionState &operator=(const DirectSubmissionState &other) {
            if (this == &other) {
                return *this;
            }
            this->isStopped = other.isStopped.load();
            this->taskCount = other.taskCount.load();
            return *this;
        }

        DirectSubmissionState() = default;
        ~DirectSubmissionState() = default;

        DirectSubmissionState(const DirectSubmissionState &other) = delete;
        DirectSubmissionState &operator=(DirectSubmissionState &&other) = delete;

        std::atomic_bool isStopped{true};
        std::atomic<TaskCountType> taskCount{0};
    };

    static void *controlDirectSubmissionsState(void *self);
    void checkNewSubmissions();
    bool isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock<std::recursive_mutex> &csrLock);
    bool isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional<TaskCountType> &bcsTaskCount);
    MOCKABLE_VIRTUAL bool sleep(std::unique_lock<std::mutex> &lock);
    MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp();
    MOCKABLE_VIRTUAL void overrideDirectSubmissionTimeouts(const ProductHelper &productHelper);

    void recalculateTimeout();
    void applyTimeoutForAcLineStatusAndThrottle(bool acLineConnected);
    void updateLastSubmittedThrottle(QueueThrottle throttle);
    size_t getTimeoutParamsMapKey(QueueThrottle throttle, bool acLineStatus);

    void handlePagingFenceRequests(std::unique_lock<std::mutex> &lock, bool checkForNewSubmissions);
    MOCKABLE_VIRTUAL TimeoutElapsedMode timeoutElapsed();
    std::chrono::microseconds getSleepValue() const { return std::chrono::microseconds(this->timeout / this->bcsTimeoutDivisor); }

    uint32_t maxCcsCount = 1u;
    std::array<uint32_t, DeviceBitfield().size()> ccsCount = {};
    std::unordered_map<CommandStreamReceiver *, DirectSubmissionState> directSubmissions;
    std::mutex directSubmissionsMutex;

    std::unique_ptr<Thread> directSubmissionControllingThread;
    std::atomic_bool keepControlling = true;
    std::atomic_bool runControlling = false;

    SteadyClock::time_point timeSinceLastCheck{};
    SteadyClock::time_point lastTerminateCpuTimestamp{};
    HighResolutionClock::time_point lastHangCheckTime{};
    std::chrono::microseconds maxTimeout{defaultTimeout};
    std::chrono::microseconds timeout{defaultTimeout};
    int32_t timeoutDivisor = 1;
    int32_t bcsTimeoutDivisor = 1;
    QueueThrottle lowestThrottleSubmitted = QueueThrottle::HIGH;
    bool isCsrIdleDetectionEnabled = false;

    std::condition_variable condVar;
    std::mutex condVarMutex;

    std::queue<WaitForPagingFenceRequest> pagingFenceRequests;
};
} // namespace NEO