1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
#version 450 core
#extension GL_EXT_bfloat16 : require
#extension GL_KHR_cooperative_matrix : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_scalar_block_layout : enable
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
const bfloat16_t bc10 = bfloat16_t(10);
const bf16vec2 b2c20 = bf16vec2(20);
layout(constant_id = 1) const bfloat16_t bsc10 = bfloat16_t(10);
struct S { bfloat16_t b; };
const S sc = S(bc10);
const bfloat16_t bca[2] = {bc10, bc10};
shared bfloat16_t bfs[10];
layout(scalar) buffer B {
bf16vec3 b3;
bf16vec2 b2;
bfloat16_t b1;
} buf;
bfloat16_t funcbf16(bfloat16_t x)
{
return x;
}
bfloat16_t funcf32(float32_t x)
{
return bfloat16_t(x);
}
bfloat16_t funcf64(float64_t x)
{
return bfloat16_t(x);
}
void main()
{
float f = 2.0;
bfloat16_t b = bfloat16_t(1.0);
bfloat16_t(f);
bf16vec2 b2 = bf16vec2(f);
uint8_t u8 = uint8_t(5);
uint16_t u16 = uint16_t(5);
uint32_t u32 = 5;
uint64_t u64 = 5;
int8_t i8 = int8_t(6);
int16_t i16 = int16_t(6);
int32_t i32 = 6;
int64_t i64 = 6;
bfloat16_t bf16 = bfloat16_t(7);
float16_t f16 = float16_t(7);
float32_t f32 = 7;
float64_t f64 = 7;
b = bfloat16_t(u8);
b = bfloat16_t(u16);
b = bfloat16_t(u32);
b = bfloat16_t(u64);
b = bfloat16_t(i8);
b = bfloat16_t(i16);
b = bfloat16_t(i32);
b = bfloat16_t(i64);
b = bfloat16_t(bf16);
b = bfloat16_t(f16);
b = bfloat16_t(f32);
b = bfloat16_t(f64);
u8 = uint8_t(b);
u16 = uint16_t(b);
u32 = uint32_t(b);
u64 = uint64_t(b);
i8 = int8_t(b);
i16 = int16_t(b);
i32 = int32_t(b);
i64 = int64_t(b);
bf16 = bfloat16_t(b);
f16 = float16_t(b);
f32 = float32_t(b);
f64 = float64_t(b);
f32 = b;
f64 = b;
b = buf.b1;
b2 = buf.b2;
bf16vec3 b3 = buf.b3;
dot(b2, b2);
coopmat<bfloat16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmA = coopmat<bfloat16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(3.0);
coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmAf = coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(cmA);
funcbf16(b);
funcf32(b);
funcf64(b);
int16_t i16_1 = bfloat16BitsToIntEXT(b);
i16vec2 i16_2 = bfloat16BitsToIntEXT(b2);
i16vec3 i16_3 = bfloat16BitsToIntEXT(bf16vec3(b2, b));
i16vec4 i16_4 = bfloat16BitsToIntEXT(bf16vec4(b2, b2));
uint16_t u16_1 = bfloat16BitsToUintEXT(b);
u16vec2 u16_2 = bfloat16BitsToUintEXT(b2);
u16vec3 u16_3 = bfloat16BitsToUintEXT(bf16vec3(b2, b));
u16vec4 u16_4 = bfloat16BitsToUintEXT(bf16vec4(b2, b2));
bfloat16_t b16_1 = intBitsToBFloat16EXT(i16_1);
bf16vec2 b16_2 = intBitsToBFloat16EXT(i16_2);
bf16vec3 b16_3 = intBitsToBFloat16EXT(i16_3);
bf16vec4 b16_4 = intBitsToBFloat16EXT(i16_4);
b16_1 = uintBitsToBFloat16EXT(u16_1);
b16_2 = uintBitsToBFloat16EXT(u16_2);
b16_3 = uintBitsToBFloat16EXT(u16_3);
b16_4 = uintBitsToBFloat16EXT(u16_4);
}
bfloat16_t func2(bf16vec4 v[2], int i)
{
bfloat16_t b;
b = (bf16vec2(v[0]))[i];
b = (bf16vec2(v[1])).y;
v[i].z = v[i][i];
S s;
s.b = b;
b = (i != 0) ? b : bfloat16_t(2);
if (i == 2) {
b = v[0].x;
} else {
b = v[1].y;
}
return b;
}
|