File: queue_state.h

package info (click to toggle)
vulkan-validationlayers 1.4.321.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 47,412 kB
  • sloc: cpp: 594,175; python: 11,321; sh: 24; makefile: 20; xml: 14
file content (251 lines) | stat: -rw-r--r-- 10,584 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
/* Copyright (c) 2015-2025 The Khronos Group Inc.
 * Copyright (c) 2015-2025 Valve Corporation
 * Copyright (c) 2015-2025 LunarG, Inc.
 * Copyright (C) 2015-2024 Google Inc.
 * Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma once
#include "state_tracker/state_object.h"
#include "state_tracker/fence_state.h"
#include "state_tracker/semaphore_state.h"
#include <condition_variable>
#include <deque>
#include <future>
#include <thread>
#include <vector>
#include <string>
#include "error_message/error_location.h"
#include "chassis/dispatch_object.h"
#include "vk_layer_config.h"

namespace vvl {

class CommandBuffer;
class DeviceState;
class Image;
class Queue;
class QueueSubState;

struct CommandBufferSubmission {
    std::shared_ptr<vvl::CommandBuffer> cb;
    // Specifically made for GPU-AV, for it has unique problems: Error reporting is done *after*
    // command buffer submissions, not at Pre/PostCall time.
    // Contrary to sync val, GPU-AV cannot just look at `GetQueueState()->cmdbuf_label_stack`
    // to construct an initial label stack. sync-val can do that because validation and error reporting is done
    // *before* a command buffer list is submitted: validation is performed one command buffer at a time,
    // and `GetQueueState()->cmdbuf_label_stack` is updated between those validations.
    // When GPU-AV starts doing error reporting, when command buffers have completed,
    // the label stack info stored in Queue state is lost.
    // => GPU-AV needs to track this initial label stack per command buffer submission.
    std::vector<std::string> initial_label_stack;

    CommandBufferSubmission(std::shared_ptr<vvl::CommandBuffer> cb, std::vector<std::string> initial_label_stack)
        : cb(std::move(cb)), initial_label_stack(std::move(initial_label_stack)) {}
    CommandBufferSubmission(CommandBufferSubmission &&other)
        : cb(std::move(other.cb)), initial_label_stack(std::move(other.initial_label_stack)) {}
    CommandBufferSubmission &operator=(const CommandBufferSubmission &other) = default;
    CommandBufferSubmission(const CommandBufferSubmission &) = default;
};

struct QueueSubmission {
    QueueSubmission(const Location &loc_) : loc(loc_), completed(), waiter(completed.get_future()) {}

    bool is_last_submission{false};
    std::vector<vvl::CommandBufferSubmission> cb_submissions{};

    std::vector<SemaphoreInfo> wait_semaphores;
    std::vector<SemaphoreInfo> signal_semaphores;
    std::shared_ptr<Fence> fence;
    bool has_external_fence = false;
    // Swapchain handle if this submission represents QueuePresent request
    VkSwapchainKHR swapchain = VK_NULL_HANDLE;
    std::shared_ptr<const vvl::Image> swapchain_image;

    LocationCapture loc;
    uint64_t seq{0};
    uint32_t perf_submit_pass{0};
    std::promise<void> completed;
    std::shared_future<void> waiter;

    void AddCommandBuffer(std::shared_ptr<vvl::CommandBuffer> cb_state, std::vector<std::string> initial_label_stack) {
        cb_submissions.emplace_back(std::move(cb_state), std::move(initial_label_stack));
    }

    void AddSignalSemaphore(std::shared_ptr<Semaphore> &&semaphore_state, uint64_t value) {
        signal_semaphores.emplace_back(std::move(semaphore_state), value);
    }

    void AddWaitSemaphore(std::shared_ptr<Semaphore> &&semaphore_state, uint64_t value) {
        wait_semaphores.emplace_back(std::move(semaphore_state), value);
    }

    void AddFence(std::shared_ptr<Fence> &&fence_state) { fence = std::move(fence_state); }

    void EndUse();
    void BeginUse();
};

// This timeout is for all queue threads to update their state after we know
// (via being in a PostRecord call) that a fence, semaphore or wait for idle has completed.
//
// NOTE 2025-07-07: we did not have bugs related to timeouts for quite some time.
// At the same time low timeout value (10 seconds) was the source of confusion during
// debugging when the program was waiting on breakpoint longer than timeout value.
//
// Set infinite value for debug builds. For release builds stay on the safe side and use
// a larger but still waitable value. In the future, after more testing, we might want use
// infinite timeout in all cases (or it's possible that non-threaded solution will happen first,
// as part of imporving submit time validation).
//
// Timeout can be overwritten with VK_QUEUE_THREAD_DEFAULT_TIMEOUT environment variable.
static inline std::chrono::time_point<std::chrono::steady_clock> GetCondWaitTimeout() {
    const std::string envvar_timeout_str = GetEnvironment("VK_QUEUE_THREAD_DEFAULT_TIMEOUT");
    const uint64_t envvar_timeout_value = !envvar_timeout_str.empty() ? std::atoi(envvar_timeout_str.c_str()) : 0;

    uint64_t timeout_seconds = 0;
    if (envvar_timeout_value) {
        timeout_seconds = envvar_timeout_value;
    } else {
#ifndef NDEBUG
        // infinite value for debug builds
        timeout_seconds = vvl::kU32Max;
#else
        // large timeout for release builds but still waitable in case of issue
        timeout_seconds = 120;
#endif
    }
    return std::chrono::steady_clock::now() + std::chrono::seconds(timeout_seconds);
}

struct PreSubmitResult {
    uint64_t last_submission_seq = 0;
    uint64_t submission_seq = 0;
};

class Queue : public StateObject, public SubStateManager<QueueSubState> {
  public:
    Queue(DeviceState &dev_data, VkQueue handle, uint32_t family_index, uint32_t queue_index, VkDeviceQueueCreateFlags flags,
          const VkQueueFamilyProperties &queueFamilyProperties)
        : StateObject(handle, kVulkanObjectTypeQueue),
          queue_family_index(family_index),
          queue_index(queue_index),
          create_flags(flags),
          queue_family_properties(queueFamilyProperties),
          dev_data_(dev_data) {}

    ~Queue() { Destroy(); }
    void Destroy() override;

    VkQueue VkHandle() const { return handle_.Cast<VkQueue>(); }

    // called from the various PreCallRecordQueueSubmit() methods
    PreSubmitResult PreSubmit(std::vector<QueueSubmission> &&submissions);
    // called from the various PostCallRecordQueueSubmit() methods
    void PostSubmit();

    // Tell the queue thread that submissions up to and including the submission with
    // sequence number until_seq have finished. kU64Max means to finish all submissions.
    void Notify(uint64_t until_seq = kU64Max);

    // Wait for the queue thread to finish processing submissions with sequence numbers
    // up to and including until_seq. kU64Max means to finish all submissions.
    void Wait(const Location &loc, uint64_t until_seq = kU64Max);

    // Helper that combines Notify and Wait
    void NotifyAndWait(const Location &loc, uint64_t until_seq = kU64Max);

    // Find a timeline wait that does not have a resolving signal submitted yet.
    // Check submissions up to and including until_seq.
    std::optional<SemaphoreInfo> FindTimelineWaitWithoutResolvingSignal(uint64_t until_seq) const;

    // VVL needs helps to retire submsissions on present-only queue that does not use explicit host synchronization
    void UpdatePresentOnlyQueueProgress(const DeviceState &device_state);

    // Queue family index. As queueFamilyIndex parameter in vkGetDeviceQueue.
    const uint32_t queue_family_index;

    // Index of the queue within a queue family. As queueIndex parameter in vkGetDeviceQueue.
    const uint32_t queue_index;

    const VkDeviceQueueCreateFlags create_flags;
    const VkQueueFamilyProperties queue_family_properties;

    // Track command buffer label stack accross all command buffers submitted to this queue.
    // Access to this variable relies on external queue synchronization.
    std::vector<std::string> cmdbuf_label_stack;

    // Track the last closed label. It is used in the error messages to help locate unbalanced vkCmdEndDebugUtilsLabelEXT command.
    // Access to this variable relies on external queue synchronization.
    std::string last_closed_cmdbuf_label;

    // Stop per-queue label tracking after the first label mismatch error.
    // Access to this variable relies on external queue synchronization.
    bool found_unbalanced_cmdbuf_label = false;

    // If at any point this queue was used for specific queue operations
    bool is_used_for_presentation = false;     // QueuePresent
    bool is_used_for_regular_submits = false;  // QueueSubmit and QueueBindSparse

    using LockGuard = std::unique_lock<std::mutex>;
    LockGuard Lock() const { return LockGuard(lock_); }

    const std::deque<QueueSubmission> &Submissions() { return submissions_; }

  protected:
    // called from the various PostCallRecordQueueSubmit() methods
    void PostSubmit(QueueSubmission &submission);

    // called when the worker thread decides a submissions has finished executing
    void Retire(QueueSubmission &submission);

  private:
    uint32_t timeline_wait_count_ = 0;

    void ThreadFunc();
    QueueSubmission *NextSubmission();

    DeviceState &dev_data_;

    // state related to submitting to the queue, all data members must
    // be accessed with lock_ held
    std::unique_ptr<std::thread> thread_;
    std::deque<QueueSubmission> submissions_;
    std::atomic<uint64_t> seq_{0};
    uint64_t request_seq_{0};
    bool exit_thread_{false};
    mutable std::mutex lock_;
    // condition to wake up the queue's thread
    std::condition_variable cond_;
};

class QueueSubState {
  public:
    explicit QueueSubState(Queue &q) : base(q) {}
    QueueSubState(const QueueSubState &) = delete;
    QueueSubState &operator=(const QueueSubState &) = delete;

    virtual ~QueueSubState() {}
    virtual void Destroy() {}

    virtual void PreSubmit(std::vector<QueueSubmission> &submissions) {}
    virtual void PostSubmit(std::deque<QueueSubmission> &submissions_) {}
    virtual void Retire(QueueSubmission &submission) {}

    VulkanTypedHandle Handle() const { return base.Handle(); }
    VkQueue VkHandle() const { return base.VkHandle(); }

    Queue &base;
};
}  // namespace vvl