File: spv.coopmat2_constructor.comp

package info (click to toggle)
glslang 16.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 51,084 kB
  • sloc: cpp: 90,714; yacc: 4,243; sh: 603; python: 305; ansic: 94; javascript: 74; makefile: 17
file content (77 lines) | stat: -rw-r--r-- 3,439 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#version 450 core
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_NV_cooperative_matrix2 : enable

layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

buffer BufType {
   float16_t x[];
} Buf;

uint32_t addr(const in uint32_t x, const in uint32_t y) {
    return y*64+x;
}

uint32_t foo() { return 124; }

float16_t relu(const in uint32_t row, const in uint32_t col, const in float16_t x) { return max(x, float16_t(0)); }

float16_t add(const in uint32_t row, const in uint32_t col, const in float16_t x, const in float16_t y) { return x+y; }

float16_t combineSum(const in float16_t a, const in float16_t b) { return a + b; }
float16_t combineMax(const in float16_t a, const in float16_t b) { return max(a, b); }

layout(constant_id = 0) const uint32_t Dim = 32;

void main()
{
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA> A;
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseB> B;
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Acc;

    A = coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Acc);
    B = coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseB>(Acc);

    coopmat<float16_t, gl_ScopeWorkgroup, 32, 64, gl_MatrixUseB> tr;

    coopMatTransposeNV(tr, Acc);

    coopMatReduceNV(Acc, Acc, gl_CooperativeMatrixReduceRowNV, combineSum);
    coopMatReduceNV(Acc, Acc, gl_CooperativeMatrixReduceColumnNV, combineSum);
    coopMatReduceNV(Acc, Acc, gl_CooperativeMatrixReduceRowAndColumnNV, combineSum);

    coopmat<float16_t, gl_ScopeWorkgroup, 32, 16, gl_MatrixUseAccumulator> Acc2x2;
    coopMatReduceNV(Acc2x2, Acc, gl_CooperativeMatrixReduce2x2NV, combineMax);

    //coopMatLoadTensorNV(A, Buf.x, foo(), addr);
    //coopMatStoreTensorNV(A, Buf.x, foo(), addr);

    coopMatPerElementNV(Acc, Acc, relu);
    coopMatPerElementNV(Acc, Acc, add, float16_t(1.0));

    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accf16;
    coopmat<float32_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accf32;
    coopmat<uint32_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accu32;
    coopmat<int32_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accs32;

    coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accu32);
    coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accu32);
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accu32);

    coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accs32);
    coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accs32);
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accs32);

    coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf16);
    coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf16);
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf16);

    coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf32);
    coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf32);
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf32);

    coopmat<float, gl_ScopeWorkgroup, Dim, Dim, gl_MatrixUseAccumulator> li, mijm1;
    mijm1 = coopmat<float, gl_ScopeWorkgroup, Dim, Dim, gl_MatrixUseAccumulator>(-1.0/0.0);
}