File: blas.comp

package info (click to toggle)
vulkan-validationlayers 1.4.341.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 54,356 kB
  • sloc: cpp: 675,478; python: 12,311; sh: 24; makefile: 24; xml: 14
file content (85 lines) | stat: -rw-r--r-- 3,337 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// Copyright (c) 2022-2026 The Khronos Group Inc.
// Copyright (c) 2022-2026 Valve Corporation
// Copyright (c) 2022-2026 LunarG, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require

#include "common.h"
#include "build_acceleration_structures.h"

layout(push_constant, scalar)
uniform PushConstants {
    BLASValidationShaderPushData pc;
};

// CPU will try to dispatch `primitive_count` threads
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference, scalar) buffer ArrayU8 { uint8_t array[]; };
layout(buffer_reference, scalar) buffer ArrayU16 { uint16_t array[]; };
layout(buffer_reference, scalar) buffer ArrayU32 { uint array[]; };

// From VkIndexType
const uint VK_INDEX_TYPE_UINT16 = 0;
const uint VK_INDEX_TYPE_UINT32 = 1;
const uint VK_INDEX_TYPE_UINT8 = 1000265000;
const uint VK_INDEX_TYPE_NONE_KHR = 1000165000;

uint LoadIndex(uint i) {
    if (pc.index_type == VK_INDEX_TYPE_UINT16) {
        ArrayU16 array_u16 = ArrayU16(pc.index_data + pc.primitive_offset);
        return uint(array_u16.array[i]);
    } else if (pc.index_type == VK_INDEX_TYPE_UINT32) {
        ArrayU32 array_u32 = ArrayU32(pc.index_data + pc.primitive_offset);
        return array_u32.array[i];
    } else if (pc.index_type == VK_INDEX_TYPE_UINT8) {
        ArrayU8 array_u8 = ArrayU8(pc.index_data + pc.primitive_offset);
        return uint(array_u8.array[i]);
    } else {
        return 0;
    }
}

void StoreIndex(uint i, uint value) {
    if (pc.index_type == VK_INDEX_TYPE_UINT16) {
        ArrayU16 array_u16 = ArrayU16(pc.index_data + pc.primitive_offset);
        array_u16.array[i] = uint16_t(value);
    } else if (pc.index_type == VK_INDEX_TYPE_UINT32) {
        ArrayU32 array_u32 = ArrayU32(pc.index_data + pc.primitive_offset);
        array_u32.array[i] = value;
    } else if (pc.index_type == VK_INDEX_TYPE_UINT8) {
        ArrayU8 array_u8 = ArrayU8(pc.index_data + pc.primitive_offset);
        array_u8.array[i] = uint8_t(value);
    }
}


void main() {
    const uint gid = gl_GlobalInvocationID.x;

    if (gid >= (3 * pc.primitive_count)) {
        return;
    }
    const uint fetched_index = LoadIndex(gid);
    if (pc.max_vertex < (pc.first_vertex + fetched_index)) {
        // In practice an invalid index does not cause a device loss, so don't bother changing its value.
        // Should someone add this back, a write barrier needs to be added on the CPU side.
        // StoreIndex(gid, 0);
        GpuavLogError4(kErrorGroup_GpuPreBuildAccelerationStructures, kErrorSubCode_PreBuildAccelerationStructures_MaxFetchedIndex, fetched_index, gid, pc.error_info_i, 0);
    }
}