File: spv.coopmat2_tensor.comp

package info (click to toggle)
glslang 16.2.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 51,712 kB
  • sloc: cpp: 92,305; yacc: 4,320; sh: 603; python: 305; ansic: 94; javascript: 74; makefile: 17
file content (77 lines) | stat: -rw-r--r-- 2,412 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#version 450 core
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_NV_cooperative_matrix2 : enable
#extension GL_EXT_buffer_reference : enable

layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

buffer BufType {
   float16_t arr[256][256];
   float16_t x[];
} Buf;

layout(buffer_reference, std430, buffer_reference_align = 2) buffer fp16Buf {
    float16_t f;
};


float16_t decode(const in fp16Buf b, const in uint32_t blockCoords[2], const in uint32_t coordInBlock[2])
{
    return b.f;
}

struct S {
   f16vec2 x;
};


layout(std430, binding  = 0) buffer SBuf {
    S s[];
} sbuf;

layout(constant_id = 0) const uint32_t Clamp = gl_CooperativeMatrixClampModeConstantNV;
layout(constant_id = 1) const int32_t Clamp_signed = gl_CooperativeMatrixClampModeConstantNV;

void main()
{
    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA> A;

    tensorLayoutNV<2> t = createTensorLayoutNV(2);
    tensorLayoutNV<3, 1> t2 = createTensorLayoutNV(3, 1);

    t = setTensorLayoutBlockSizeNV(t, 4, 8);
    t = setTensorLayoutDimensionNV(t, 256, 512);
    t = sliceTensorLayoutNV(t, 128, 32, 256, 32);

    tensorViewNV<5> v = createTensorViewNV(5);
    v = setTensorViewDimensionsNV(v, 10, 11, 12, 13, 14);
    v = setTensorViewStrideNV(v, 10, 11, 12, 13, 15);
    v = setTensorViewClipNV(v, 0, 16, 0, 16);

    tensorViewNV<5, true> v2 = createTensorViewNV(5, true);
    tensorViewNV<2, true, 1, 0> v3 = createTensorViewNV(2, true, 1, 0);

    coopMatLoadTensorNV(A, Buf.x, 0, t);
    coopMatStoreTensorNV(A, Buf.x, 0, t);

    coopMatLoadTensorNV(A, Buf.x, 0, t, v);
    coopMatStoreTensorNV(A, Buf.x, 0, t, v);

    coopMatLoadTensorNV(A, Buf.x, 0, t, decode);
    coopMatLoadTensorNV(A, Buf.x, 0, t, v, decode);

    coopMatLoadTensorNV(A, sbuf.s, 1, t, v, decode);

    tensorLayoutNV<2, Clamp> tc = createTensorLayoutNV(2, Clamp);

    coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA> Arr[2];
    coopMatLoadTensorNV(Arr[1], Buf.x, 0, t);

    coopMatLoadTensorNV(A, Buf.arr[8], 0, t);
    coopMatStoreTensorNV(A, Buf.arr[16], 0, t);

    tensorLayoutNV<2, Clamp_signed> tc_s = createTensorLayoutNV(2, Clamp_signed);
    tensorViewNV<Clamp, true> v4 = createTensorViewNV(Clamp, true);
}