File: bp_constants.h

package info (click to toggle)
vulkan-validationlayers 1.4.321.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 47,412 kB
  • sloc: cpp: 594,175; python: 11,321; sh: 24; makefile: 20; xml: 14
file content (136 lines) | stat: -rw-r--r-- 5,838 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/* Copyright (c) 2025 The Khronos Group Inc.
 * Copyright (c) 2025 Valve Corporation
 * Copyright (c) 2025 LunarG, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once
#include <stdint.h>
#include <chrono>
#include <vulkan/vulkan.h>

// Maximum number of instanced vertex buffers which should be used
static const uint32_t kMaxInstancedVertexBuffers = 1;

// If a buffer or image is allocated and it consumes an entire VkDeviceMemory, it should at least be this large.
// This is slightly different from minDeviceAllocationSize since the 256K buffer can still be sensibly
// suballocated from. If we consume an entire allocation with one image or buffer, it should at least be for a
// very large allocation.
static const VkDeviceSize kMinDedicatedAllocationSize = 1024 * 1024;

// AMD best practices
// Note: These are initial ball park numbers for good performance
// We expect to adjust them as we get more data on layer usage
// Avoid small command buffers
static const uint32_t kMinRecommendedCommandBufferSizeAMD = 10;
// Avoid small secondary command buffers
static const uint32_t kMinRecommendedDrawsInSecondaryCommandBufferSizeAMD = 10;
// Idealy, only 1 fence per frame, so 3 for triple buffering
static const uint32_t kMaxRecommendedFenceObjectsSizeAMD = 3;
// Avoid excessive sempahores
static const uint32_t kMaxRecommendedSemaphoreObjectsSizeAMD = 10;
// Avoid excessive barriers
static const uint32_t kMaxRecommendedBarriersSizeAMD = 500;
// Avoid excessive pipelines
static const uint32_t kMaxRecommendedNumberOfPSOAMD = 5000;
// Unlikely that the user needs all the dynamic states enabled at the same time, and they encur a cost
static const uint32_t kDynamicStatesWarningLimitAMD = 7;
// Too many dynamic descriptor sets can cause a large pipeline layout
static const uint32_t kPipelineLayoutSizeWarningLimitAMD = 13;
// Check that the user is submitting excessivly to a queue
static const uint32_t kNumberOfSubmissionWarningLimitAMD = 20;
// Check that there is enough work per vertex stream change
static const float kVertexStreamToDrawRatioWarningLimitAMD = 0.8f;
// Check that there is enough work per pipeline change
static const float kDrawsPerPipelineRatioWarningLimitAMD = 5.f;
// Check that command buffers are used with an appropriatly sized pool
static const float kCmdBufferToCmdPoolRatioWarningLimitAMD = 0.1f;
// Size for fast descriptor reads on modern NVIDIA devices
static const uint32_t kPipelineLayoutFastDescriptorSpaceNVIDIA = 256;
// Time threshold for flagging allocations that could have been reused
static const auto kAllocateMemoryReuseTimeThresholdNVIDIA = std::chrono::seconds{5};
// Number of switches in tessellation, gemetry, and mesh shader state before signalling a message
static const uint32_t kNumBindPipelineTessGeometryMeshSwitchesThresholdNVIDIA = 4;
// Ratio where the Z-cull direction starts being considered balanced
static const int kZcullDirectionBalanceRatioNVIDIA = 20;
// Maximum number of custom clear colors
static const size_t kMaxRecommendedNumberOfClearColorsNVIDIA = 16;

// How many small indexed drawcalls in a command buffer before a warning is thrown
static const uint32_t kMaxSmallIndexedDrawcalls = 10;

// How many indices make a small indexed drawcall
static const int kSmallIndexedDrawcallIndices = 10;

// Minimum number of vertices/indices to take into account when doing depth pre-pass checks for Arm Mali GPUs
static const int kDepthPrePassMinDrawCountArm = 500;

// Minimum, number of draw calls in order to trigger depth pre-pass warnings for Arm Mali GPUs
static const int kDepthPrePassNumDrawCallsArm = 20;

// Maximum sample count for full throughput on Mali GPUs
static const VkSampleCountFlagBits kMaxEfficientSamplesArm = VK_SAMPLE_COUNT_4_BIT;

// On Arm Mali architectures, it's generally best to align work group dimensions to 4.
static const uint32_t kThreadGroupDispatchCountAlignmentArm = 4;

// Maximum number of threads which can efficiently be part of a compute workgroup when using thread group barriers.
static const uint32_t kMaxEfficientWorkGroupThreadCountArm = 64;

// Minimum number of vertices/indices a draw needs to have before considering it in depth prepass warnings on PowerVR
static const int kDepthPrePassMinDrawCountIMG = 300;

// Minimum, number of draw calls matching the above criteria before triggerring a depth prepass warning on PowerVR
static const int kDepthPrePassNumDrawCallsIMG = 10;

// Maximum sample count on PowerVR before showing a warning
static const VkSampleCountFlagBits kMaxEfficientSamplesImg = VK_SAMPLE_COUNT_4_BIT;

struct SpecialUseVUIDs {
    const char* cadsupport;
    const char* d3demulation;
    const char* devtools;
    const char* debugging;
    const char* glemulation;
};

typedef enum {
    kBPVendorArm = 0x00000001,
    kBPVendorAMD = 0x00000002,
    kBPVendorIMG = 0x00000004,
    kBPVendorNVIDIA = 0x00000008,
} BPVendorFlagBits;
typedef VkFlags BPVendorFlags;

enum IMAGE_SUBRESOURCE_USAGE_BP {
    UNDEFINED,  // If it has never been used
    RENDER_PASS_CLEARED,
    RENDER_PASS_READ_TO_TILE,
    CLEARED,
    DESCRIPTOR_ACCESS,
    RENDER_PASS_STORED,
    RENDER_PASS_DISCARDED,
    BLIT_READ,
    BLIT_WRITE,
    RESOLVE_READ,
    RESOLVE_WRITE,
    COPY_READ,
    COPY_WRITE
};

enum class ZcullDirection {
    Unknown,
    Less,
    Greater,
};