File: spv.coopmat.comp

package info (click to toggle)
glslang 16.2.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,712 kB
  • sloc: cpp: 92,305; yacc: 4,320; sh: 603; python: 305; ansic: 94; javascript: 74; makefile: 17
file content (119 lines) | stat: -rw-r--r-- 3,500 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#version 450 core
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_NV_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#extension GL_EXT_buffer_reference : enable

layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

const int X = 8;
layout(constant_id = 0) const int Y = 2;
const int Z = X*Y;

fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC;
fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC2[3];

int arr[mC.length()];
int arr2[mC2[1].length()];

layout(constant_id = 1) const float F = 3.0;

const fcoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = fcoopmatNV<32, gl_ScopeSubgroup, Z, 8>(0.0);
const fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> mD2 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1);

struct S { int a; int b; int c; };

const S s = S(12, 23, 34);

layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
    float y[1024*1024];
    float x[];
} block;

layout(set = 0, binding = 0) coherent buffer Block16 {
    float16_t arr[256][256];
    float16_t y[1024*1024];
    float16_t x[];

    Block b;
} block16;

fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16(fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> m) { return -m; }
fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> f32(fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> m) { return -m; }

layout(constant_id = 2) const int SC = 1;
fcoopmatNV<16, gl_ScopeSubgroup, SC, SC> scm[SC][SC];

// sized for fcoopmatNV<16, gl_ScopeSubgroup, 16, 16>
shared uvec4 shmatrix[16*16*2/16];

void main()
{
    fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)>(0.0);

    m = m + m;
    m = m - m;
    m = -m;
    m = 2.0*m;
    m = m*2.0;

    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> m2 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(m);

    float x = m[1];
    m[0] = x;

    coopMatLoadNV(m, block.x, 16, 128, false);
    coopMatStoreNV(m, block.x, 16, 128, false);
    coopMatLoadNV(m2, block16.x, 16, 128, false);
    coopMatStoreNV(m2, block16.x, 16, 128, false);
    coopMatLoadNV(m, block16.b.x, 16, 128, false);
    coopMatStoreNV(m, block16.b.x, 16, 128, false);

    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> A;
    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> B;
    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> C;
    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> D;
    D = coopMatMulAddNV(A, B, C);

    int l = D.length();

    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> E;

    fcoopmatNV<16, gl_ScopeSubgroup, Z, Z> F = fcoopmatNV<16, gl_ScopeSubgroup, Z, Z>(0.0);

    fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
    a[3][0] = 1.0;

    float md1 = mD[1];

    md1 += (m += m)[1234];

    mC2[1] = mC2[2];

    coopMatLoadNV(m, block.y, 16, 128, false);
    coopMatStoreNV(m, block.y, 16, 128, false);
    coopMatLoadNV(m2, block16.y, 16, 128, false);
    coopMatStoreNV(m2, block16.y, 16, 128, false);

    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> p1;
    fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> p2;

    p1 = f16(p1);
    p2 = f32(p2);

    p1 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(0.0);
    p2 = fcoopmatNV<32, gl_ScopeSubgroup, 8, 8>(0.0);

    p1 /= p1;

    p1 *= float16_t(2.0);
    p2 *= 4.0;

    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> ms;
    coopMatLoadNV(ms, shmatrix, 1, 2, false);
    coopMatStoreNV(ms, shmatrix, 1, 2, false);

    coopMatLoadNV(ms, block16.arr[8], 1, 2, false);
    coopMatStoreNV(ms, block16.arr[16], 1, 2, false);

}