File: IntelExtensions12.hlsl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (179 lines) | stat: -rw-r--r-- 6,212 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
/*========================== begin_copyright_notice ============================

Copyright (C) 2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

//
// Intel extension buffer structure, generic interface for
//   0-operand extensions (e.g. sync opcodes)
//   1-operand unary operations (e.g. render target writes)
//   2-operand binary operations (future extensions)
//   3-operand ternary operations (future extensions)
//
struct IntelExtensionStruct
{
    uint   opcode;  // opcode to execute
    uint   rid; // resource ID
    uint   sid; // sampler ID

    float4 src0f;   // float source operand  0
    float4 src1f;   // float source operand  0
    float4 src2f;   // float source operand  0
    float4 dst0f;   // float destination operand

    uint4  src0u;
    uint4  src1u;
    uint4  src2u;
    uint4  dst0u;

    float  pad[181]; // total length 864
};

//
// extension opcodes
//

// Define RW buffer for Intel extensions.
// Application should bind null resource, operations will be ignored.
// If application needs to use slot other than u63, it needs to
// define INTEL_SHADER_EXT_UAV_SLOT as a unused slot. This should be
// defined before including this file in shader as:
// #define INTEL_SHADER_EXT_UAV_SLOT u8

#ifdef INTEL_SHADER_EXT_UAV_SLOT
RWStructuredBuffer<IntelExtensionStruct> g_IntelExt : register( INTEL_SHADER_EXT_UAV_SLOT );
#else
RWStructuredBuffer<IntelExtensionStruct> g_IntelExt : register( u63 );
#endif

#define INTEL_EXT_UINT64_ATOMIC           24

#define INTEL_EXT_ATOMIC_ADD          0
#define INTEL_EXT_ATOMIC_MIN          1
#define INTEL_EXT_ATOMIC_MAX          2
#define INTEL_EXT_ATOMIC_CMPXCHG      3
#define INTEL_EXT_ATOMIC_XCHG         4
#define INTEL_EXT_ATOMIC_AND          5
#define INTEL_EXT_ATOMIC_OR           6
#define INTEL_EXT_ATOMIC_XOR          7


//
// Initialize Intel HLSL Extensions
// This method should be called before any other extension function
//
void IntelExt_Init()
{
    uint4 init = { 0x63746e69, 0x6c736c68, 0x6e747865, 0x32313030 }; // intc hlsl extn 0012
    g_IntelExt[0].src0u = init;
}


// uint64 typed atomics
// Interlocked max
uint2 IntelExt_InterlockedMaxUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_MAX;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked min
uint2 IntelExt_InterlockedMinUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_MIN;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked and
uint2 IntelExt_InterlockedAndUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_AND;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked or
uint2 IntelExt_InterlockedOrUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_OR;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked add
uint2 IntelExt_InterlockedAddUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_ADD;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked xor
uint2 IntelExt_InterlockedXorUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_XOR;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked xor
uint2 IntelExt_InterlockedExchangeUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_XCHG;

    return g_IntelExt[opcode].dst0u.xy;
}

// Interlocked compare exchange
uint2 IntelExt_InterlockedCompareExchangeUint64(RWTexture2D<uint2> uav, uint2 address, uint2 cmp_value, uint2 xchg_value)
{
    uint opcode = g_IntelExt.IncrementCounter();
    uav[uint2(opcode, opcode)] = uint2(0, 0); //dummy instruction to get the resource handle
    g_IntelExt[opcode].opcode = INTEL_EXT_UINT64_ATOMIC;
    g_IntelExt[opcode].src0u.xy = address;
    g_IntelExt[opcode].src1u.xy = cmp_value;
    g_IntelExt[opcode].src1u.zw = xchg_value;
    g_IntelExt[opcode].src2u.x = INTEL_EXT_ATOMIC_CMPXCHG;

    return g_IntelExt[opcode].dst0u.xy;
}