1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
|
/* Copyright (c) 2015-2025 The Khronos Group Inc.
* Copyright (c) 2015-2025 Valve Corporation
* Copyright (c) 2015-2025 LunarG, Inc.
* Copyright (C) 2015-2024 Google Inc.
* Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "state_tracker/state_object.h"
#include "state_tracker/fence_state.h"
#include "state_tracker/semaphore_state.h"
#include <condition_variable>
#include <deque>
#include <future>
#include <thread>
#include <vector>
#include <string>
#include "error_message/error_location.h"
#include "chassis/dispatch_object.h"
#include "vk_layer_config.h"
namespace vvl {
class CommandBuffer;
class DeviceState;
class Image;
class Queue;
class QueueSubState;
struct CommandBufferSubmission {
std::shared_ptr<vvl::CommandBuffer> cb;
// Specifically made for GPU-AV, for it has unique problems: Error reporting is done *after*
// command buffer submissions, not at Pre/PostCall time.
// Contrary to sync val, GPU-AV cannot just look at `GetQueueState()->cmdbuf_label_stack`
// to construct an initial label stack. sync-val can do that because validation and error reporting is done
// *before* a command buffer list is submitted: validation is performed one command buffer at a time,
// and `GetQueueState()->cmdbuf_label_stack` is updated between those validations.
// When GPU-AV starts doing error reporting, when command buffers have completed,
// the label stack info stored in Queue state is lost.
// => GPU-AV needs to track this initial label stack per command buffer submission.
std::vector<std::string> initial_label_stack;
CommandBufferSubmission(std::shared_ptr<vvl::CommandBuffer> cb, std::vector<std::string> initial_label_stack)
: cb(std::move(cb)), initial_label_stack(std::move(initial_label_stack)) {}
CommandBufferSubmission(CommandBufferSubmission &&other)
: cb(std::move(other.cb)), initial_label_stack(std::move(other.initial_label_stack)) {}
CommandBufferSubmission &operator=(const CommandBufferSubmission &other) = default;
CommandBufferSubmission(const CommandBufferSubmission &) = default;
};
struct QueueSubmission {
QueueSubmission(const Location &loc_) : loc(loc_), completed(), waiter(completed.get_future()) {}
bool is_last_submission{false};
std::vector<vvl::CommandBufferSubmission> cb_submissions{};
std::vector<SemaphoreInfo> wait_semaphores;
std::vector<SemaphoreInfo> signal_semaphores;
std::shared_ptr<Fence> fence;
bool has_external_fence = false;
// Swapchain handle if this submission represents QueuePresent request
VkSwapchainKHR swapchain = VK_NULL_HANDLE;
std::shared_ptr<const vvl::Image> swapchain_image;
LocationCapture loc;
uint64_t seq{0};
uint32_t perf_submit_pass{0};
std::promise<void> completed;
std::shared_future<void> waiter;
void AddCommandBuffer(std::shared_ptr<vvl::CommandBuffer> cb_state, std::vector<std::string> initial_label_stack) {
cb_submissions.emplace_back(std::move(cb_state), std::move(initial_label_stack));
}
void AddSignalSemaphore(std::shared_ptr<Semaphore> &&semaphore_state, uint64_t value) {
signal_semaphores.emplace_back(std::move(semaphore_state), value);
}
void AddWaitSemaphore(std::shared_ptr<Semaphore> &&semaphore_state, uint64_t value) {
wait_semaphores.emplace_back(std::move(semaphore_state), value);
}
void AddFence(std::shared_ptr<Fence> &&fence_state) { fence = std::move(fence_state); }
void EndUse();
void BeginUse();
};
// This timeout is for all queue threads to update their state after we know
// (via being in a PostRecord call) that a fence, semaphore or wait for idle has completed.
//
// NOTE 2025-07-07: we did not have bugs related to timeouts for quite some time.
// At the same time low timeout value (10 seconds) was the source of confusion during
// debugging when the program was waiting on breakpoint longer than timeout value.
//
// Set infinite value for debug builds. For release builds stay on the safe side and use
// a larger but still waitable value. In the future, after more testing, we might want use
// infinite timeout in all cases (or it's possible that non-threaded solution will happen first,
// as part of imporving submit time validation).
//
// Timeout can be overwritten with VK_QUEUE_THREAD_DEFAULT_TIMEOUT environment variable.
static inline std::chrono::time_point<std::chrono::steady_clock> GetCondWaitTimeout() {
const std::string envvar_timeout_str = GetEnvironment("VK_QUEUE_THREAD_DEFAULT_TIMEOUT");
const uint64_t envvar_timeout_value = !envvar_timeout_str.empty() ? std::atoi(envvar_timeout_str.c_str()) : 0;
uint64_t timeout_seconds = 0;
if (envvar_timeout_value) {
timeout_seconds = envvar_timeout_value;
} else {
#ifndef NDEBUG
// infinite value for debug builds
timeout_seconds = vvl::kU32Max;
#else
// large timeout for release builds but still waitable in case of issue
timeout_seconds = 120;
#endif
}
return std::chrono::steady_clock::now() + std::chrono::seconds(timeout_seconds);
}
struct PreSubmitResult {
uint64_t last_submission_seq = 0;
uint64_t submission_seq = 0;
};
class Queue : public StateObject, public SubStateManager<QueueSubState> {
public:
Queue(DeviceState &dev_data, VkQueue handle, uint32_t family_index, uint32_t queue_index, VkDeviceQueueCreateFlags flags,
const VkQueueFamilyProperties &queueFamilyProperties)
: StateObject(handle, kVulkanObjectTypeQueue),
queue_family_index(family_index),
queue_index(queue_index),
create_flags(flags),
queue_family_properties(queueFamilyProperties),
dev_data_(dev_data) {}
~Queue() { Destroy(); }
void Destroy() override;
VkQueue VkHandle() const { return handle_.Cast<VkQueue>(); }
// called from the various PreCallRecordQueueSubmit() methods
PreSubmitResult PreSubmit(std::vector<QueueSubmission> &&submissions);
// called from the various PostCallRecordQueueSubmit() methods
void PostSubmit();
// Tell the queue thread that submissions up to and including the submission with
// sequence number until_seq have finished. kU64Max means to finish all submissions.
void Notify(uint64_t until_seq = kU64Max);
// Wait for the queue thread to finish processing submissions with sequence numbers
// up to and including until_seq. kU64Max means to finish all submissions.
void Wait(const Location &loc, uint64_t until_seq = kU64Max);
// Helper that combines Notify and Wait
void NotifyAndWait(const Location &loc, uint64_t until_seq = kU64Max);
// Find a timeline wait that does not have a resolving signal submitted yet.
// Check submissions up to and including until_seq.
std::optional<SemaphoreInfo> FindTimelineWaitWithoutResolvingSignal(uint64_t until_seq) const;
// VVL needs helps to retire submsissions on present-only queue that does not use explicit host synchronization
void UpdatePresentOnlyQueueProgress(const DeviceState &device_state);
// Queue family index. As queueFamilyIndex parameter in vkGetDeviceQueue.
const uint32_t queue_family_index;
// Index of the queue within a queue family. As queueIndex parameter in vkGetDeviceQueue.
const uint32_t queue_index;
const VkDeviceQueueCreateFlags create_flags;
const VkQueueFamilyProperties queue_family_properties;
// Track command buffer label stack accross all command buffers submitted to this queue.
// Access to this variable relies on external queue synchronization.
std::vector<std::string> cmdbuf_label_stack;
// Track the last closed label. It is used in the error messages to help locate unbalanced vkCmdEndDebugUtilsLabelEXT command.
// Access to this variable relies on external queue synchronization.
std::string last_closed_cmdbuf_label;
// Stop per-queue label tracking after the first label mismatch error.
// Access to this variable relies on external queue synchronization.
bool found_unbalanced_cmdbuf_label = false;
// If at any point this queue was used for specific queue operations
bool is_used_for_presentation = false; // QueuePresent
bool is_used_for_regular_submits = false; // QueueSubmit and QueueBindSparse
using LockGuard = std::unique_lock<std::mutex>;
LockGuard Lock() const { return LockGuard(lock_); }
const std::deque<QueueSubmission> &Submissions() { return submissions_; }
protected:
// called from the various PostCallRecordQueueSubmit() methods
void PostSubmit(QueueSubmission &submission);
// called when the worker thread decides a submissions has finished executing
void Retire(QueueSubmission &submission);
private:
uint32_t timeline_wait_count_ = 0;
void ThreadFunc();
QueueSubmission *NextSubmission();
DeviceState &dev_data_;
// state related to submitting to the queue, all data members must
// be accessed with lock_ held
std::unique_ptr<std::thread> thread_;
std::deque<QueueSubmission> submissions_;
std::atomic<uint64_t> seq_{0};
uint64_t request_seq_{0};
bool exit_thread_{false};
mutable std::mutex lock_;
// condition to wake up the queue's thread
std::condition_variable cond_;
};
class QueueSubState {
public:
explicit QueueSubState(Queue &q) : base(q) {}
QueueSubState(const QueueSubState &) = delete;
QueueSubState &operator=(const QueueSubState &) = delete;
virtual ~QueueSubState() {}
virtual void Destroy() {}
virtual void PreSubmit(std::vector<QueueSubmission> &submissions) {}
virtual void PostSubmit(std::deque<QueueSubmission> &submissions_) {}
virtual void Retire(QueueSubmission &submission) {}
VulkanTypedHandle Handle() const { return base.Handle(); }
VkQueue VkHandle() const { return base.VkHandle(); }
Queue &base;
};
} // namespace vvl
|