File: IBiF_Dot_Product.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (181 lines) | stat: -rw-r--r-- 11,827 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*========================== begin_copyright_notice ============================

Copyright (C) 2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifdef cl_khr_integer_dot_product
//===-  IBiF_Dot_Product.cl -===============================================//
//
// This file defines OpenCL dot product functions.
// These functions are part of the KHR cl_khr_integer_dot_product extension.
//
//===--------------------------------------------------------------------===//

// Supported Caps related to the cl_khr_integer_dot_product extension
#define __opencl_c_dot_product_signed
#define __opencl_c_dot_product_mixed_signedness
#define __opencl_c_integer_dot_product_saturation_accumulation
#define __opencl_c_integer_dot_product_input_4x8bit_packed

// Currently support CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR only

#define  DEFN_INTEL_DOT_PRODUCT(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, ARG_SUFFIX)        \
INLINE TYPE OVERLOADABLE dot( TYPE_ARG1 a, TYPE_ARG2 b )                                    \
{                                                                                           \
    return __builtin_spirv_Op##TYPE_SUFFIX##DotKHR_##ARG_SUFFIX##_##ARG_SUFFIX(a, b);       \
}

#define  DEFN_INTEL_DOT_PRODUCT_SAT(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, ARG_SUFFIX, ACC_SUFFIX)                \
INLINE TYPE OVERLOADABLE  dot_acc_sat( TYPE_ARG1 a, TYPE_ARG2 b, TYPE acc )                                         \
{                                                                                                                   \
    return __builtin_spirv_Op##TYPE_SUFFIX##DotAccSatKHR_##ARG_SUFFIX##_##ARG_SUFFIX##_##ACC_SUFFIX(a, b, acc);     \
}

#define DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, ARG_SUFFIX, TYPE_SUFFIX_IB)   \
TYPE __builtin_spirv_Op##TYPE_SUFFIX##DotKHR_##ARG_SUFFIX##_##ARG_SUFFIX(TYPE_ARG1 a, TYPE_ARG2 b)                  \
{                                                                                                                   \
    union { int _i; TYPE_ARG1 _arg1; TYPE_ARG2 _arg2 } a1, a2;                                                      \
    a1._arg1 = a;                                                                                                   \
    a2._arg2 = b;                                                                                                   \
    return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, a1._i, a2._i);                                                     \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, ARG_SUFFIX, ACC_SUFFIX, TYPE_SUFFIX_IB)   \
TYPE __builtin_spirv_Op##TYPE_SUFFIX##DotAccSatKHR_##ARG_SUFFIX##_##ARG_SUFFIX##_##ACC_SUFFIX(TYPE_ARG1 a, TYPE_ARG2 b, TYPE acc)   \
{                                                                                                                                   \
    union { int _i; TYPE_ARG1 _arg1; TYPE_ARG2 _arg2 } a1, a2;                                                                      \
    a1._arg1 = a;                                                                                                                   \
    a2._arg2 = b;                                                                                                                   \
    return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, a1._i, a2._i);                                                                   \
}

#define DEFN_INTEL_DOT_PRODUCT_US(TYPE, ARG_TYPE, ARG_SUFFIX)               \
INLINE TYPE OVERLOADABLE dot( u##ARG_TYPE a, ARG_TYPE b )                   \
{                                                                           \
    return __builtin_spirv_OpSUDotKHR_##ARG_SUFFIX##_##ARG_SUFFIX(b, a);    \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_US(TYPE, ARG_TYPE, ARG_SUFFIX, ACC_SUFFIX)                       \
INLINE TYPE OVERLOADABLE dot_acc_sat( u##ARG_TYPE a, ARG_TYPE b, TYPE acc )                         \
{                                                                                                   \
    return __builtin_spirv_OpSUDotAccSatKHR_##ARG_SUFFIX##_##ARG_SUFFIX##_##ACC_SUFFIX(b, a, acc);  \
}

#define DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, TYPE_SUFFIX_IB)    \
TYPE __builtin_spirv_Op##TYPE_SUFFIX##DotKHR_i32_i32_i32(TYPE_ARG1 a, TYPE_ARG2 b, int packed)                  \
{                                                                                                               \
    union { int _i; TYPE_ARG1 _arg1; TYPE_ARG2 _arg2 } a1, a2;                                                  \
    a1._arg1 = a;                                                                                               \
    a2._arg2 = b;                                                                                               \
    return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, a1._i, a2._i);                                                 \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, ACC_SUFFIX, TYPE_SUFFIX_IB)    \
TYPE __builtin_spirv_Op##TYPE_SUFFIX##DotAccSatKHR_i32_i32_##ACC_SUFFIX##_i32(TYPE_ARG1 a, TYPE_ARG2 b, TYPE acc, int packed)   \
{                                                                                                                               \
    union { int _i; TYPE_ARG1 _arg1; TYPE_ARG2 _arg2 } a1, a2;                                                                  \
    a1._arg1 = a;                                                                                                               \
    a2._arg2 = b;                                                                                                               \
    return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, a1._i, a2._i);                                                               \
}

#define  DEFN_INTEL_DOT_PRODUCT_PACKED(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX)         \
INLINE TYPE OVERLOADABLE dot( TYPE_ARG1 a, TYPE_ARG2 b, int packed )                    \
{                                                                                       \
    return __builtin_spirv_Op##TYPE_SUFFIX##DotKHR_i32_i32_i32(a, b, packed);           \
}

#define  DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(TYPE, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, ACC_SUFFIX)           \
INLINE TYPE OVERLOADABLE  dot_acc_sat( TYPE_ARG1 a, TYPE_ARG2 b, TYPE acc, int packed )                   \
{                                                                                                         \
    return __builtin_spirv_Op##TYPE_SUFFIX##DotAccSatKHR_i32_i32_##ACC_SUFFIX##_i32(a, b, acc, packed);   \
}

#define DEFN_INTEL_DOT_PRODUCT_PACKED_US(TYPE, ARG_TYPE, ARG_SUFFIX)                    \
INLINE TYPE OVERLOADABLE dot( u##ARG_TYPE a, ARG_TYPE b, int packed )                   \
{                                                                                       \
    return __builtin_spirv_OpSUDotKHR_##ARG_SUFFIX##_##ARG_SUFFIX##_i32(b, a, packed);  \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US(TYPE, ARG_TYPE, ARG_SUFFIX, ACC_SUFFIX)                                \
INLINE TYPE OVERLOADABLE dot_acc_sat( u##ARG_TYPE a, ARG_TYPE b, TYPE acc, int packed )                             \
{                                                                                                                   \
    return __builtin_spirv_OpSUDotAccSatKHR_##ARG_SUFFIX##_##ARG_SUFFIX##_##ACC_SUFFIX##_i32(b, a, acc, packed);    \
}


DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(uint, uchar4, uchar4, U, v4i8, uu)
DEFN_INTEL_DOT_PRODUCT(uint, uchar4, uchar4, U, v4i8)
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(uint, ushort2, ushort2, U, v2i16, uu)
DEFN_INTEL_DOT_PRODUCT(uint, ushort2, ushort2, U, v2i16)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, uu)
DEFN_INTEL_DOT_PRODUCT_PACKED(uint, uint, uint, U)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, uchar4, uchar4, U, v4i8, i32, uu)
DEFN_INTEL_DOT_PRODUCT_SAT(uint, uchar4, uchar4, U, v4i8, i32)
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, ushort2, ushort2, U, v2i16, i32, uu)
DEFN_INTEL_DOT_PRODUCT_SAT(uint, ushort2, ushort2, U, v2i16, i32)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, i32, uu)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(uint, uint, uint, U, i32)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#endif // __opencl_c_integer_dot_product_saturation_accumulation

#ifdef __opencl_c_dot_product_signed
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, char4, char4, S, v4i8, ss)
DEFN_INTEL_DOT_PRODUCT(int, char4, char4, S, v4i8)
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, short2, short2, S, v2i16, ss)
DEFN_INTEL_DOT_PRODUCT(int, short2, short2, S, v2i16)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, int, S, ss)
DEFN_INTEL_DOT_PRODUCT_PACKED(int, int, int, S)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, char4, S, v4i8, i32, ss)
DEFN_INTEL_DOT_PRODUCT_SAT(int, char4, char4, S, v4i8, i32)
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, short2, S, v2i16, i32, ss)
DEFN_INTEL_DOT_PRODUCT_SAT(int, short2, short2, S, v2i16, i32)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, int, S, i32, ss)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(int, int, int, S, i32)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#endif // __opencl_c_integer_dot_product_saturation_accumulation
#endif // __opencl_c_dot_product_signed

#ifdef __opencl_c_dot_product_mixed_signedness
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, char4, uchar4, SU, v4i8, su)
DEFN_INTEL_DOT_PRODUCT(int, char4, uchar4, SU, v4i8)
DEFN_INTEL_DOT_PRODUCT_US(int, char4, v4i8)
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, short2, ushort2, SU, v2i16, su)
DEFN_INTEL_DOT_PRODUCT(int, short2, ushort2, SU, v2i16)
DEFN_INTEL_DOT_PRODUCT_US(int, short2, v2i16)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, uint, SU, su)
DEFN_INTEL_DOT_PRODUCT_PACKED(int, int, uint, SU)
DEFN_INTEL_DOT_PRODUCT_PACKED_US(int, int, i32)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, uchar4, SU, v4i8, i32, su)
DEFN_INTEL_DOT_PRODUCT_SAT(int, char4, uchar4, SU, v4i8, i32)
DEFN_INTEL_DOT_PRODUCT_SAT_US(int, char4, v4i8, i32)
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, ushort2, SU, v2i16, i32, su)
DEFN_INTEL_DOT_PRODUCT_SAT(int, short2, ushort2, SU, v2i16, i32)
DEFN_INTEL_DOT_PRODUCT_SAT_US(int, short2, v2i16, i32)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, uint, SU, i32, su)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(int, int, uint, SU, i32)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US(int, int, i32, i32)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#endif // __opencl_c_integer_dot_product_saturation_accumulation
#endif // __opencl_c_dot_product_mixed_signedness

// For possible future support of CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_ALL_KHR
// extension, one needs to:
// - Add arguments of 'short/ushort' type and 'long' result type
// - Add different vector sizes such as 2,3,8,16
#endif // cl_khr_integer_dot_product