File: IBiF_Dot_Product.cl

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (170 lines) | stat: -rw-r--r-- 11,601 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*========================== begin_copyright_notice ============================

Copyright (C) 2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifdef cl_khr_integer_dot_product
//===-  IBiF_Dot_Product.cl -===============================================//
//
// This file defines OpenCL dot product functions.
// These functions are part of the KHR cl_khr_integer_dot_product extension.
//
//===--------------------------------------------------------------------===//

// Supported Caps related to the cl_khr_integer_dot_product extension
#define __opencl_c_dot_product_signed
#define __opencl_c_dot_product_mixed_signedness
#define __opencl_c_integer_dot_product_saturation_accumulation
#define __opencl_c_integer_dot_product_input_4x8bit_packed

// Currently support CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR only

#define DEFN_INTEL_DOT_PRODUCT(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW)     \
INLINE TYPE_RET OVERLOADABLE dot(TYPE_ARG1 a, TYPE_ARG2 b)                                                  \
{                                                                                                           \
    return SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(a, b);                            \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW)     \
INLINE TYPE_RET OVERLOADABLE dot_acc_sat(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc)                                \
{                                                                                                               \
    return SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(a, b, acc);                     \
}

#define DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB)     \
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b)                      \
{                                                                                                                                         \
    return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b));                                                               \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB, SAT_PREFIX)     \
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc)                  \
{                                                                                                                                                         \
    TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b));                                                                   \
    return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc);                                                                              \
}

#define DEFN_INTEL_DOT_PRODUCT_US(TYPE_RET, TYPE_ARG, MANGLING_OLD, MANGLING_NEW)     \
INLINE TYPE_RET OVERLOADABLE dot(u##TYPE_ARG a, TYPE_ARG b)                           \
{                                                                                     \
    return SPIRV_BUILTIN(SUDotKHR, MANGLING_OLD, MANGLING_NEW)(b, a);                 \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_US(TYPE_RET, TYPE_ARG, MANGLING_OLD, MANGLING_NEW)        \
INLINE TYPE_RET OVERLOADABLE dot_acc_sat(u##TYPE_ARG a, TYPE_ARG b, TYPE_RET acc)            \
{                                                                                            \
    return SPIRV_BUILTIN(SUDotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(b, a, acc);             \
}

#define DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB)     \
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_ARG1 packed)           \
{                                                                                                                                                \
    return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b));                                                                      \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB, SAT_PREFIX)     \
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed)       \
{                                                                                                                                                                \
    TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b));                                                                          \
    return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc);                                                                                     \
}

#define DEFN_INTEL_DOT_PRODUCT_PACKED(TYPE_RET, ARG_TYPES, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW)     \
INLINE TYPE_RET OVERLOADABLE dot_4x8packed_##ARG_TYPES##_##TYPE_RET(uint a, uint b)                     \
{                                                                                                       \
    return SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(a, b, 0);                     \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(TYPE_RET, TYPE_SUFFIX, ARG_TYPES, MANGLING_OLD, MANGLING_NEW)        \
INLINE TYPE_RET OVERLOADABLE dot_acc_sat_4x8_packed_##ARG_TYPES##_##TYPE_RET(uint a, uint b, TYPE_RET acc)     \
{                                                                                                              \
    return SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(a, b, acc, 0);                 \
}

#define DEFN_INTEL_DOT_PRODUCT_PACKED_US                              \
INLINE int OVERLOADABLE dot_4x8packed_us_int(uint a, uint b)          \
{                                                                     \
    return SPIRV_BUILTIN(SUDotKHR, _i32_i32_i32, _Rint)(b, a, 0);     \
}

#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US                                         \
INLINE int OVERLOADABLE dot_acc_sat_4x8packed_us_int(uint a, uint b, int acc)        \
{                                                                                    \
    return SPIRV_BUILTIN(SUDotAccSatKHR, _i32_i32_i32_i32, _Rint)(b, a, acc, 0);     \
}

DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(uint, uchar4, uchar4, U, _v4i8_v4i8, _Ruint, uu)
DEFN_INTEL_DOT_PRODUCT(uint, uchar4, uchar4, U, _v4i8_v4i8, _Ruint)
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(uint, ushort2, ushort2, U, _v2i16_v2i16, _Ruint, uu)
DEFN_INTEL_DOT_PRODUCT(uint, ushort2, ushort2, U, _v2i16_v2i16, _Ruint)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32, _Ruint, uu)
DEFN_INTEL_DOT_PRODUCT_PACKED(uint, uu, U, _i32_i32_i32, _Ruint)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, uchar4, uchar4, U, _v4i8_v4i8_i32, _Ruint, uu, u)
DEFN_INTEL_DOT_PRODUCT_SAT(uint, uchar4, uchar4, U, _v4i8_v4i8_i32, _Ruint)
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, ushort2, ushort2, U, _v2i16_v2i16_i32, _Ruint, uu, u)
DEFN_INTEL_DOT_PRODUCT_SAT(uint, ushort2, ushort2, U, _v2i16_v2i16_i32, _Ruint)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32_i32, _Ruint, uu, u)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(uint, U, uu, _i32_i32_i32_i32, _Ruint)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#endif // __opencl_c_integer_dot_product_saturation_accumulation

#ifdef __opencl_c_dot_product_signed
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, char4, char4, S, _v4i8_v4i8, _Rint, ss)
DEFN_INTEL_DOT_PRODUCT(int, char4, char4, S, _v4i8_v4i8, _Rint)
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, short2, short2, S, _v2i16_v2i16, _Rint, ss)
DEFN_INTEL_DOT_PRODUCT(int, short2, short2, S, _v2i16_v2i16, _Rint)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32, _Rint, ss)
DEFN_INTEL_DOT_PRODUCT_PACKED(int, ss, S, _i32_i32_i32, _Rint)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, char4, S, _v4i8_v4i8_i32, _Rint, ss, s)
DEFN_INTEL_DOT_PRODUCT_SAT(int, char4, char4, S, _v4i8_v4i8_i32, _Rint)
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, short2, S, _v2i16_v2i16_i32, _Rint, ss, s)
DEFN_INTEL_DOT_PRODUCT_SAT(int, short2, short2, S, _v2i16_v2i16_i32, _Rint)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32_i32, _Rint, ss, s)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(int, S, ss, _i32_i32_i32_i32, _Rint)
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#endif // __opencl_c_integer_dot_product_saturation_accumulation
#endif // __opencl_c_dot_product_signed

#ifdef __opencl_c_dot_product_mixed_signedness
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, char4, uchar4, SU, _v4i8_v4i8, _Rint, su)
DEFN_INTEL_DOT_PRODUCT(int, char4, uchar4, SU, _v4i8_v4i8, _Rint)
DEFN_INTEL_DOT_PRODUCT_US(int, char4, _v4i8_v4i8, _Rint)
DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(int, short2, ushort2, SU, _v2i16_v2i16, _Rint, su)
DEFN_INTEL_DOT_PRODUCT(int, short2, ushort2, SU, _v2i16_v2i16, _Rint)
DEFN_INTEL_DOT_PRODUCT_US(int, short2, _v2i16_v2i16, _Rint)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, uint, SU, _i32_i32_i32, _Rint, su)
DEFN_INTEL_DOT_PRODUCT_PACKED(int, su, SU, _i32_i32_i32, _Rint)
DEFN_INTEL_DOT_PRODUCT_PACKED_US
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, uchar4, SU, _v4i8_v4i8_i32, _Rint, su, s)
DEFN_INTEL_DOT_PRODUCT_SAT(int, char4, uchar4, SU, _v4i8_v4i8_i32, _Rint)
DEFN_INTEL_DOT_PRODUCT_SAT_US(int, char4, _v4i8_v4i8_i32, _Rint)
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, ushort2, SU, _v2i16_v2i16_i32, _Rint, su, s)
DEFN_INTEL_DOT_PRODUCT_SAT(int, short2, ushort2, SU, _v2i16_v2i16_i32, _Rint)
DEFN_INTEL_DOT_PRODUCT_SAT_US(int, short2, _v2i16_v2i16_i32, _Rint)
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, uint, SU, _i32_i32_i32_i32, _Rint, su, s)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(int, SU, su, _i32_i32_i32_i32, _Rint)
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
#endif // __opencl_c_integer_dot_product_saturation_accumulation
#endif // __opencl_c_dot_product_mixed_signedness

// For possible future support of CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_ALL_KHR
// extension, one needs to:
// - Add arguments of 'short/ushort' type and 'long' result type
// - Add different vector sizes such as 2,3,8,16
#endif // cl_khr_integer_dot_product