1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
|
/* Copyright (c) 2015-2025 The Khronos Group Inc.
* Copyright (c) 2015-2025 Valve Corporation
* Copyright (c) 2015-2025 LunarG, Inc.
* Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
* Modifications Copyright (C) 2022 RasterGrid Kft.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "best_practices/best_practices_validation.h"
#include "best_practices/bp_state.h"
#include <vulkan/utility/vk_format_utils.h>
static std::array<uint32_t, 4> GetRawClearColor(VkFormat format, const VkClearColorValue& clear_value) {
std::array<uint32_t, 4> raw_color{};
std::copy_n(clear_value.uint32, raw_color.size(), raw_color.data());
// Zero out unused components to avoid polluting the cache with garbage
if (!vkuFormatHasRed(format)) raw_color[0] = 0;
if (!vkuFormatHasGreen(format)) raw_color[1] = 0;
if (!vkuFormatHasBlue(format)) raw_color[2] = 0;
if (!vkuFormatHasAlpha(format)) raw_color[3] = 0;
return raw_color;
}
static bool IsClearColorZeroOrOne(VkFormat format, const std::array<uint32_t, 4> clear_color) {
static_assert(sizeof(float) == sizeof(uint32_t), "Mismatching float <-> uint32 sizes");
const float one = 1.0f;
const float zero = 0.0f;
uint32_t raw_one{};
uint32_t raw_zero{};
memcpy(&raw_one, &one, sizeof(one));
memcpy(&raw_zero, &zero, sizeof(zero));
const bool is_one =
(!vkuFormatHasRed(format) || (clear_color[0] == raw_one)) && (!vkuFormatHasGreen(format) || (clear_color[1] == raw_one)) &&
(!vkuFormatHasBlue(format) || (clear_color[2] == raw_one)) && (!vkuFormatHasAlpha(format) || (clear_color[3] == raw_one));
const bool is_zero = (!vkuFormatHasRed(format) || (clear_color[0] == raw_zero)) &&
(!vkuFormatHasGreen(format) || (clear_color[1] == raw_zero)) &&
(!vkuFormatHasBlue(format) || (clear_color[2] == raw_zero)) &&
(!vkuFormatHasAlpha(format) || (clear_color[3] == raw_zero));
return is_one || is_zero;
}
bool BestPractices::ValidateZcullScope(const bp_state::CommandBufferSubState& cb_state, const Location& loc) const {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
bool skip = false;
if (cb_state.nv.depth_test_enable) {
auto& scope = cb_state.nv.zcull_scope;
skip |= ValidateZcull(cb_state, scope.image, scope.range, loc);
}
return skip;
}
bool BestPractices::ValidateZcull(const bp_state::CommandBufferSubState& cb_state, VkImage image,
const VkImageSubresourceRange& subresource_range, const Location& loc) const {
bool skip = false;
const char* good_mode = nullptr;
const char* bad_mode = nullptr;
bool is_balanced = false;
const auto image_it = cb_state.nv.zcull_per_image.find(image);
if (image_it == cb_state.nv.zcull_per_image.end()) {
return skip;
}
const auto& tree = image_it->second;
auto image_state = Get<vvl::Image>(image);
ASSERT_AND_RETURN_SKIP(image_state);
ForEachSubresource(*image_state, subresource_range, [&](uint32_t layer, uint32_t level) {
if (is_balanced) {
return;
}
const auto& resource = tree.GetState(layer, level);
const uint64_t num_draws = resource.num_less_draws + resource.num_greater_draws;
if (num_draws == 0) {
return;
}
const uint64_t less_ratio = (resource.num_less_draws * 100) / num_draws;
const uint64_t greater_ratio = (resource.num_greater_draws * 100) / num_draws;
if ((less_ratio > kZcullDirectionBalanceRatioNVIDIA) && (greater_ratio > kZcullDirectionBalanceRatioNVIDIA)) {
is_balanced = true;
if (greater_ratio > less_ratio) {
good_mode = "GREATER";
bad_mode = "LESS";
} else {
good_mode = "LESS";
bad_mode = "GREATER";
}
}
});
if (is_balanced) {
skip |= LogPerformanceWarning(
"BestPractices-NVIDIA-Zcull-LessGreaterRatio", cb_state.Handle(), loc,
"%s Depth attachment %s is primarily rendered with depth compare op %s, but some draws use %s. "
"Z-cull is disabled for the least used direction, which harms depth testing performance. "
"The Z-cull direction can be reset by clearing the depth attachment, transitioning from VK_IMAGE_LAYOUT_UNDEFINED, "
"using VK_ATTACHMENT_LOAD_OP_DONT_CARE, or using VK_ATTACHMENT_STORE_OP_DONT_CARE.",
VendorSpecificTag(kBPVendorNVIDIA), FormatHandle(cb_state.nv.zcull_scope.image).c_str(), good_mode, bad_mode);
}
return skip;
}
static constexpr std::array<VkFormat, 12> kCustomClearColorCompressedFormatsNVIDIA = {
VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A2R10G10B10_UNORM_PACK32, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_B10G11R11_UFLOAT_PACK32,
};
void BestPractices::RecordClearColor(VkFormat format, const VkClearColorValue& clear_value) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
const std::array<uint32_t, 4> raw_color = GetRawClearColor(format, clear_value);
if (IsClearColorZeroOrOne(format, raw_color)) {
// These colors are always compressed
return;
}
const auto it =
std::find(kCustomClearColorCompressedFormatsNVIDIA.begin(), kCustomClearColorCompressedFormatsNVIDIA.end(), format);
if (it == kCustomClearColorCompressedFormatsNVIDIA.end()) {
// The format cannot be compressed with a custom color
return;
}
// Record custom clear color
WriteLockGuard guard{clear_colors_lock_};
if (clear_colors_.size() < kMaxRecommendedNumberOfClearColorsNVIDIA) {
clear_colors_.insert(raw_color);
}
}
bool BestPractices::ValidateClearColor(VkCommandBuffer commandBuffer, VkFormat format, const VkClearColorValue& clear_value,
const Location& loc) const {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
bool skip = false;
const std::array<uint32_t, 4> raw_color = GetRawClearColor(format, clear_value);
if (IsClearColorZeroOrOne(format, raw_color)) {
return skip;
}
const auto it =
std::find(kCustomClearColorCompressedFormatsNVIDIA.begin(), kCustomClearColorCompressedFormatsNVIDIA.end(), format);
if (it == kCustomClearColorCompressedFormatsNVIDIA.end()) {
// The format is not compressible
std::string format_list;
for (VkFormat compressed_format : kCustomClearColorCompressedFormatsNVIDIA) {
if (compressed_format == kCustomClearColorCompressedFormatsNVIDIA.back()) {
format_list += "or ";
}
format_list += string_VkFormat(compressed_format);
if (compressed_format != kCustomClearColorCompressedFormatsNVIDIA.back()) {
format_list += ", ";
}
}
skip |= LogPerformanceWarning("BestPractices-NVIDIA-ClearColor-NotCompressed", commandBuffer, loc,
"%s Clearing image with format %s without a 1.0f or 0.0f clear color. "
"The clear will not get compressed in the GPU, harming performance. "
"This can be fixed using a clear color of VkClearColorValue{0.0f, 0.0f, 0.0f, 0.0f}, or "
"VkClearColorValue{1.0f, 1.0f, 1.0f, 1.0f}. Alternatively, use %s.",
VendorSpecificTag(kBPVendorNVIDIA), string_VkFormat(format), format_list.c_str());
} else {
// The format is compressible
bool registered = false;
{
ReadLockGuard guard{clear_colors_lock_};
registered = clear_colors_.find(raw_color) != clear_colors_.end();
if (!registered) {
// If it's not in the list, it might be new. Check if there's still space for new entries.
registered = clear_colors_.size() < kMaxRecommendedNumberOfClearColorsNVIDIA;
}
}
if (!registered) {
std::string clear_color_str;
if (vkuFormatIsUINT(format)) {
clear_color_str = std::to_string(clear_value.uint32[0]) + ", " + std::to_string(clear_value.uint32[1]) + ", " +
std::to_string(clear_value.uint32[2]) + ", " + std::to_string(clear_value.uint32[3]);
} else if (vkuFormatIsSINT(format)) {
clear_color_str = std::to_string(clear_value.int32[0]) + ", " + std::to_string(clear_value.int32[1]) + ", " +
std::to_string(clear_value.int32[2]) + ", " + std::to_string(clear_value.int32[3]);
} else {
clear_color_str = std::to_string(clear_value.float32[0]) + ", " + std::to_string(clear_value.float32[1]) + ", " +
std::to_string(clear_value.float32[2]) + ", " + std::to_string(clear_value.float32[3]);
}
skip |= LogPerformanceWarning(
"BestPractices-NVIDIA-ClearColor-NotCompressed", commandBuffer, loc,
"%s Clearing image with unregistered VkClearColorValue{%s}. "
"This clear will not get compressed in the GPU, harming performance. "
"The clear color is not registered because too many unique colors have been used. "
"Select a discrete set of clear colors and stick to those. "
"VkClearColorValue{0, 0, 0, 0} and VkClearColorValue{1.0f, 1.0f, 1.0f, 1.0f} are always registered.",
VendorSpecificTag(kBPVendorNVIDIA), clear_color_str.c_str());
}
}
return skip;
}
|