File: bp_image.cpp

package info (click to toggle)
vulkan-validationlayers 1.4.321.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 47,412 kB
  • sloc: cpp: 594,175; python: 11,321; sh: 24; makefile: 20; xml: 14
file content (296 lines) | stat: -rw-r--r-- 19,120 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/* Copyright (c) 2015-2025 The Khronos Group Inc.
 * Copyright (c) 2015-2025 Valve Corporation
 * Copyright (c) 2015-2025 LunarG, Inc.
 * Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
 * Modifications Copyright (C) 2022 RasterGrid Kft.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "best_practices/best_practices_validation.h"
#include "best_practices/bp_state.h"
#include "state_tracker/queue_state.h"

bool BestPractices::PreCallValidateCreateImage(VkDevice device, const VkImageCreateInfo* pCreateInfo,
                                               const VkAllocationCallbacks* pAllocator, VkImage* pImage,
                                               const ErrorObject& error_obj) const {
    bool skip = false;

    if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
        skip |= LogWarning("BestPractices-vkCreateImage-sharing-mode-exclusive", device,
                           error_obj.location.dot(Field::pCreateInfo).dot(Field::sharingMode),
                           "is VK_SHARING_MODE_EXCLUSIVE while specifying multiple queues "
                           "(queueFamilyIndexCount of %" PRIu32 ").",
                           pCreateInfo->queueFamilyIndexCount);
    }

    if ((pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) && !(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
        skip |= LogWarning("BestPractices-vkCreateImage-CreateFlags", device,
                           error_obj.location.dot(Field::pCreateInfo).dot(Field::flags),
                           "has VK_IMAGE_CREATE_EXTENDED_USAGE_BIT set, but not "
                           "VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, therefore image views created from this image will have to use the "
                           "same format and VK_IMAGE_CREATE_EXTENDED_USAGE_BIT will not have any effect.");
    }

    if (VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorIMG)) {
        if (pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT && !(pCreateInfo->usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT)) {
            skip |= LogPerformanceWarning(
                "BestPractices-vkCreateImage-non-transient-ms-image", device, error_obj.location,
                "%s %s Trying to create a multisampled image, but pCreateInfo->usage did not have "
                "VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. Multisampled images may be resolved on-chip, "
                "and do not need to be backed by physical storage. "
                "TRANSIENT_ATTACHMENT allows tiled GPUs to not back the multisampled image with physical memory.",
                VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG));
        }
    }

    if (VendorCheckEnabled(kBPVendorArm) && pCreateInfo->samples > kMaxEfficientSamplesArm) {
        skip |= LogPerformanceWarning(
            "BestPractices-Arm-vkCreateImage-too-large-sample-count", device, error_obj.location,
            "%s Trying to create an image with %u samples. "
            "The hardware revision may not have full throughput for framebuffers with more than %u samples.",
            VendorSpecificTag(kBPVendorArm), static_cast<uint32_t>(pCreateInfo->samples), kMaxEfficientSamplesArm);
    }

    if (VendorCheckEnabled(kBPVendorIMG) && pCreateInfo->samples > kMaxEfficientSamplesImg) {
        skip |= LogPerformanceWarning(
            "BestPractices-IMG-vkCreateImage-too-large-sample-count", device, error_obj.location,
            "%s Trying to create an image with %u samples. "
            "The device may not have full support for true multisampling for images with more than %u samples. "
            "XT devices support up to 8 samples, XE up to 4 samples.",
            VendorSpecificTag(kBPVendorIMG), static_cast<uint32_t>(pCreateInfo->samples), kMaxEfficientSamplesImg);
    }

    if (VendorCheckEnabled(kBPVendorIMG) && (pCreateInfo->format == VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG ||
                                             pCreateInfo->format == VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG)) {
        skip |= LogPerformanceWarning("BestPractices-IMG-Texture-Format-PVRTC-Outdated", device, error_obj.location,
                                      "%s Trying to create an image with a PVRTC format. Both PVRTC1 and PVRTC2 "
                                      "are slower than standard image formats on PowerVR GPUs, prefer ETC, BC, ASTC, etc.",
                                      VendorSpecificTag(kBPVendorIMG));
    }

    if (VendorCheckEnabled(kBPVendorAMD)) {
        if ((pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
            (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT)) {
            skip |= LogPerformanceWarning("BestPractices-AMD-vkImage-AvoidConcurrentRenderTargets", device, error_obj.location,
                                          "%s Trying to create an image as a render target with VK_SHARING_MODE_CONCURRENT. "
                                          "Using a SHARING_MODE_CONCURRENT "
                                          "is not recommended with color and depth targets",
                                          VendorSpecificTag(kBPVendorAMD));
        }

        if ((pCreateInfo->usage &
             (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
            (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
            skip |=
                LogPerformanceWarning("BestPractices-AMD-vkImage-DontUseMutableRenderTargets", device, error_obj.location,
                                      "%s Trying to create an image as a render target with VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT. "
                                      "Using a MUTABLE_FORMAT is not recommended with color, depth, and storage targets",
                                      VendorSpecificTag(kBPVendorAMD));
        }

        if ((pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
            (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
            skip |=
                LogPerformanceWarning("BestPractices-AMD-vkImage-DontUseStorageRenderTargets", device, error_obj.location,
                                      "%s Trying to create an image as a render target with VK_IMAGE_USAGE_STORAGE_BIT. Using a "
                                      "VK_IMAGE_USAGE_STORAGE_BIT is not recommended with color and depth targets",
                                      VendorSpecificTag(kBPVendorAMD));
        }
    }

    if (VendorCheckEnabled(kBPVendorNVIDIA)) {
        if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
            skip |= LogPerformanceWarning("BestPractices-NVIDIA-CreateImage-TilingLinear", device, error_obj.location,
                                          "%s Trying to create an image with tiling VK_IMAGE_TILING_LINEAR. "
                                          "Use VK_IMAGE_TILING_OPTIMAL instead.",
                                          VendorSpecificTag(kBPVendorNVIDIA));
        }

        if (pCreateInfo->format == VK_FORMAT_D32_SFLOAT || pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
            skip |=
                LogPerformanceWarning("BestPractices-NVIDIA-CreateImage-Depth32Format", device, error_obj.location,
                                      "%s Trying to create an image with a 32-bit depth format. Use VK_FORMAT_D24_UNORM_S8_UINT or "
                                      "VK_FORMAT_D16_UNORM instead, unless the extra precision is needed.",
                                      VendorSpecificTag(kBPVendorNVIDIA));
        }
    }

    return skip;
}

void BestPractices::QueueValidateImageView(QueueCallbacks& funcs, const Location& loc, const vvl::ImageView& image_view,
                                           IMAGE_SUBRESOURCE_USAGE_BP usage) {
    auto image_state = std::static_pointer_cast<vvl::Image>(image_view.image_state);
    if (image_view.image_state) {
        QueueValidateImage(funcs, loc, *image_view.image_state, usage, image_view.normalized_subresource_range);
    }
}

void BestPractices::QueueValidateImage(QueueCallbacks& funcs, const Location& loc, vvl::Image& image_state,
                                       IMAGE_SUBRESOURCE_USAGE_BP usage, const VkImageSubresourceRange& subresource_range) {
    // If we're viewing a 3D slice, ignore base array layer.
    // The entire 3D subresource is accessed as one atomic unit.
    const uint32_t base_array_layer = image_state.create_info.imageType == VK_IMAGE_TYPE_3D ? 0 : subresource_range.baseArrayLayer;

    const uint32_t max_layers = image_state.create_info.arrayLayers - base_array_layer;
    const uint32_t array_layers = std::min(subresource_range.layerCount, max_layers);
    const uint32_t max_levels = image_state.create_info.mipLevels - subresource_range.baseMipLevel;
    const uint32_t mip_levels = std::min(image_state.create_info.mipLevels, max_levels);

    for (uint32_t layer = 0; layer < array_layers; layer++) {
        for (uint32_t level = 0; level < mip_levels; level++) {
            QueueValidateImage(funcs, loc, image_state, usage, layer + base_array_layer, level + subresource_range.baseMipLevel);
        }
    }
}

void BestPractices::QueueValidateImage(QueueCallbacks& funcs, const Location& loc, vvl::Image& image_state,
                                       IMAGE_SUBRESOURCE_USAGE_BP usage, const VkImageSubresourceLayers& subresource_layers) {
    const uint32_t max_layers = image_state.create_info.arrayLayers - subresource_layers.baseArrayLayer;
    const uint32_t array_layers = std::min(subresource_layers.layerCount, max_layers);

    for (uint32_t layer = 0; layer < array_layers; layer++) {
        QueueValidateImage(funcs, loc, image_state, usage, layer + subresource_layers.baseArrayLayer, subresource_layers.mipLevel);
    }
}

void BestPractices::QueueValidateImage(QueueCallbacks& funcs, const Location& loc, vvl::Image& image_state,
                                       IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer, uint32_t mip_level) {
    funcs.emplace_back(
        [this, loc, &image_state, usage, array_layer, mip_level](const vvl::Queue& qs, const vvl::CommandBuffer& cbs) -> bool {
            ValidateImageInQueue(qs, cbs, loc, image_state, usage, array_layer, mip_level);
            return false;
        });
}

void BestPractices::ValidateImageInQueueArmImg(const Location& loc, vvl::Image& image_state, IMAGE_SUBRESOURCE_USAGE_BP last_usage,
                                               IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer, uint32_t mip_level) {
    // Swapchain images are implicitly read so clear after store is expected.
    if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED && last_usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_STORED &&
        !image_state.IsSwapchainImage()) {
        LogPerformanceWarning(
            "BestPractices-RenderPass-redundant-store", device, loc,
            "%s %s Subresource (arrayLayer: %u, mipLevel: %u) of image was cleared as part of LOAD_OP_CLEAR, but last time "
            "image was used, it was written to with STORE_OP_STORE. "
            "Storing to the image is probably redundant in this case, and wastes bandwidth on tile-based "
            "architectures.",
            VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG), array_layer, mip_level);
    } else if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED && last_usage == IMAGE_SUBRESOURCE_USAGE_BP::CLEARED) {
        LogPerformanceWarning(
            "BestPractices-RenderPass-redundant-clear", device, loc,
            "%s %s Subresource (arrayLayer: %u, mipLevel: %u) of image was cleared as part of LOAD_OP_CLEAR, but last time "
            "image was used, it was written to with vkCmdClear*Image(). "
            "Clearing the image with vkCmdClear*Image() is probably redundant in this case, and wastes bandwidth on "
            "tile-based architectures.",
            VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG), array_layer, mip_level);
    } else if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE &&
               (last_usage == IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE || last_usage == IMAGE_SUBRESOURCE_USAGE_BP::CLEARED ||
                last_usage == IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE || last_usage == IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE)) {
        const char* last_cmd = nullptr;
        const char* vuid = nullptr;
        const char* suggestion = nullptr;

        switch (last_usage) {
            case IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE:
                vuid = "BestPractices-RenderPass-blitimage-loadopload";
                last_cmd = "vkCmdBlitImage";
                suggestion =
                    "The blit is probably redundant in this case, and wastes bandwidth on tile-based architectures. "
                    "Rather than blitting, just render the source image in a fragment shader in this render pass, "
                    "which avoids the memory roundtrip.";
                break;
            case IMAGE_SUBRESOURCE_USAGE_BP::CLEARED:
                vuid = "BestPractices-RenderPass-inefficient-clear";
                last_cmd = "vkCmdClear*Image";
                suggestion =
                    "Clearing the image with vkCmdClear*Image() is probably redundant in this case, and wastes bandwidth on "
                    "tile-based architectures. "
                    "Use LOAD_OP_CLEAR instead to clear the image for free.";
                break;
            case IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE:
                vuid = "BestPractices-RenderPass-copyimage-loadopload";
                last_cmd = "vkCmdCopy*Image";
                suggestion =
                    "The copy is probably redundant in this case, and wastes bandwidth on tile-based architectures. "
                    "Rather than copying, just render the source image in a fragment shader in this render pass, "
                    "which avoids the memory roundtrip.";
                break;
            case IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE:
                vuid = "BestPractices-RenderPass-resolveimage-loadopload";
                last_cmd = "vkCmdResolveImage";
                suggestion =
                    "The resolve is probably redundant in this case, and wastes a lot of bandwidth on tile-based architectures. "
                    "Rather than resolving, and then loading, try to keep rendering in the same render pass, "
                    "which avoids the memory roundtrip.";
                break;
            default:
                break;
        }

        LogPerformanceWarning(
            vuid, device, loc,
            "%s %s Subresource (arrayLayer: %u, mipLevel: %u) of image was loaded to tile as part of LOAD_OP_LOAD, but last "
            "time image was used, it was written to with %s. %s",
            VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG), array_layer, mip_level, last_cmd, suggestion);
    }
}

void BestPractices::ValidateImageInQueue(const vvl::Queue& qs, const vvl::CommandBuffer& cbs, const Location& loc,
                                         vvl::Image& image_state, IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer,
                                         uint32_t mip_level) {
    auto queue_family = qs.queue_family_index;
    auto& sub_state = bp_state::SubState(image_state);
    auto last_usage = sub_state.UpdateUsage(array_layer, mip_level, usage, queue_family);

    // Concurrent sharing usage of image with exclusive sharing mode
    if (image_state.create_info.sharingMode == VK_SHARING_MODE_EXCLUSIVE && last_usage.queue_family_index != queue_family) {
        // if UNDEFINED then first use/acquisition of subresource
        if (last_usage.type != IMAGE_SUBRESOURCE_USAGE_BP::UNDEFINED) {
            // If usage might read from the subresource, as contents are undefined
            // so write only is fine
            if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE || usage == IMAGE_SUBRESOURCE_USAGE_BP::BLIT_READ ||
                usage == IMAGE_SUBRESOURCE_USAGE_BP::COPY_READ || usage == IMAGE_SUBRESOURCE_USAGE_BP::DESCRIPTOR_ACCESS ||
                usage == IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_READ) {
                LogWarning("BestPractices-ConcurrentUsageOfExclusiveImage", image_state.Handle(), loc,
                           "Subresource (arrayLayer: %" PRIu32 ", mipLevel: %" PRIu32
                           ") of image is used on queue family index %" PRIu32 " after being used on queue family index %" PRIu32
                           ", but has VK_SHARING_MODE_EXCLUSIVE, and has not been acquired and released with a ownership transfer "
                           "operation",
                           array_layer, mip_level, queue_family, last_usage.queue_family_index);
            }
        }
    }

    // When image was discarded with StoreOpDontCare but is now being read with LoadOpLoad
    if (last_usage.type == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_DISCARDED &&
        usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE) {
        LogWarning("BestPractices-StoreOpDontCareThenLoadOpLoad", device, loc,
                   "Trying to load an attachment with LOAD_OP_LOAD that was previously stored with STORE_OP_DONT_CARE. This may "
                   "result in undefined behaviour.");
    }

    if (VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorIMG)) {
        ValidateImageInQueueArmImg(loc, image_state, last_usage.type, usage, array_layer, mip_level);
    }
}

void BestPractices::Created(vvl::Image& image_state) {
    image_state.SetSubState(container_type, std::make_unique<bp_state::ImageSubState>(image_state));
}