1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
|
/*
* Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_CLHELPERS_H
#define ARM_COMPUTE_CLHELPERS_H
#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include <set>
#include <string>
namespace arm_compute
{
class CLCompileContext;
class CLBuildOptions;
enum class DataType;
/** Max vector width of an OpenCL vector */
static constexpr unsigned int max_cl_vector_width = 16;
/** Max number of manual loop unrolling */
static constexpr int max_manual_loop_unrolling = 128;
/** Translates a tensor data type to the appropriate OpenCL type.
*
* @param[in] dt @ref DataType to be translated to OpenCL type.
*
* @return The string specifying the OpenCL type to be used.
*/
std::string get_cl_type_from_data_type(const DataType &dt);
/** Translates a tensor data type to the appropriate OpenCL promoted type.
*
* @param[in] dt @ref DataType to be used to get the promoted OpenCL type.
*
* @return The string specifying the OpenCL type to be used.
*/
std::string get_cl_promoted_type_from_data_type(const DataType &dt);
/** Translates the element size to an unsigned integer data type
*
* @param[in] element_size Size in bytes of an element.
*
* @return The string specifying the OpenCL type to be used.
*/
std::string get_cl_unsigned_type_from_element_size(size_t element_size);
/** Translates the element size to an signed integer data type
*
* @param[in] element_size Size in bytes of an element.
*
* @return The string specifying the OpenCL type to be used.
*/
std::string get_cl_signed_type_from_element_size(size_t element_size);
/** Translates a tensor data type to the appropriate OpenCL select type.
*
* @param[in] dt @ref DataType to be translated to OpenCL select type.
*
* @return The string specifying the OpenCL select type to be used.
*/
std::string get_cl_select_type_from_data_type(const DataType &dt);
/** Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
*
* @param[in] dt @ref DataType to be translated to OpenCL dot8 accumulator type.
*
* @return The string specifying the OpenCL dot8 accumulator type to be used.
*/
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt);
/** Get the size of a data type in number of bits.
*
* @param[in] dt @ref DataType.
*
* @return Number of bits in the data type specified.
*/
std::string get_data_size_from_data_type(const DataType &dt);
/** Helper function to get the GPU target from CL device
*
* @param[in] device A CL device
*
* @return the GPU target
*/
GPUTarget get_target_from_device(const cl::Device &device);
/** Helper function to get the highest OpenCL version supported
*
* @param[in] device A CL device
*
* @return the highest OpenCL version supported
*/
CLVersion get_cl_version(const cl::Device &device);
/** Helper function to get the cl_image pitch alignment in pixels
*
* @param[in] device A CL device
*
* @return the cl_image pitch alignment in pixels. If an error occurs, the function will return 0
*/
size_t get_cl_image_pitch_alignment(const cl::Device &device);
/** Helper function to check whether non-uniform work group is supported
*
* @param[in] device A CL device
*
* @return True if the feature is supported
*/
bool get_cl_non_uniform_work_group_supported(const cl::Device &device);
/** Helper function to check whether a given extension is supported
*
* @param[in] device A CL device
* @param[in] extension_name Name of the extension to be checked
*
* @return True if the extension is supported
*/
bool device_supports_extension(const cl::Device &device, const char *extension_name);
/** Helper function to check whether the cl_khr_fp16 extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool fp16_supported(const cl::Device &device);
/** Helper function to check whether the arm_non_uniform_work_group_size extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool arm_non_uniform_workgroup_supported(const cl::Device &device);
/** Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool dot8_supported(const cl::Device &device);
/** Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool dot8_acc_supported(const cl::Device &device);
/** This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL
*
* @param[in] output_tile Output tile for the Winograd filtering algorithm
* @param[in] kernel_size Kernel size for the Winograd filtering algorithm
* @param[in] data_layout Data layout of the input tensor
*
* @return True if the configuration is supported
*/
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,
const Size2D &kernel_size,
DataLayout data_layout);
/** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors
*
* @param[in] device A CL device
* @param[in] dt data type
*
* @return preferred vector width
*/
size_t preferred_vector_width(const cl::Device &device, DataType dt);
/** Helper function to check if "dummy work-items" are preferred to have a power of two NDRange
* In case dummy work-items is enabled, it is OpenCL kernel responsibility to check if the work-item is out-of range or not
*
* @param[in] device A CL device
*
* @return True if dummy work-items should be preferred to dispatch the NDRange
*/
bool preferred_dummy_work_items_support(const cl::Device &device);
/** Helper function to check whether the cl_khr_image2d_from_buffer extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool image2d_from_buffer_supported(const cl::Device &device);
/** Creates an opencl kernel using a compile context
*
* @param[in] ctx A compile context to be used to create the opencl kernel.
* @param[in] kernel_name The kernel name.
* @param[in] build_opts The build options to be used for the opencl kernel compilation.
*
* @return An opencl kernel
*/
cl::Kernel create_kernel(const CLCompileContext &ctx,
const std::string &kernel_name,
const std::set<std::string> &build_opts = std::set<std::string>());
/** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
* If input width is smaller than 128 we can use fewer threads than 8.
*
* @param[in] input_dimension number of elements along the dimension to apply the parallellization
* @param[in] vector_size size of the vector in OpenCL
*
* @return An LWS hint object
*/
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size);
/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device
*
* @param[in] device cl device to check for support
*
* @return true if the workgroup batch size modifier parameter is supported, false otherwise
*/
bool get_wbsm_support_info(const cl::Device &device);
/* Helper function to set the workgroup batch size modifier parameter in the kernel
*
* @param[in] kernel cl kernel to set the workgroup batch size modifier parameter
* @param[in] wbsm_hint workgroup batch size modifier to use
*/
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
/* Helper function to check if we can export the tensor to cl_image
*
* @param[in] input tensor
*
* @return true if we can export the tensor to cl_image
*/
bool export_to_cl_image(const ITensorInfo *tensor);
/* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling
*
* This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling
*
* @param[in] built_opts OpenCL kernel build options
* @param[in] values Input values (iterations)
*
*/
void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values);
/** Helper function to check whether the cl_arm_matrix_multiply extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool arm_matrix_multiply_supported(const cl::Device &device);
/** Check whether cl_khr_command_buffer extension is supported by the specified CL device.
*
* @param[in] device The CL device
*
* @return True if the extension is supported by the CL device.
*/
bool command_buffer_supported(const cl::Device &device);
/** Check whether cl_khr_command_buffer_mutable_dispatch extension is supported by the specified CL device.
*
* @param[in] device The CL device
*
* @return True if the extension is supported by the CL device.
*/
bool command_buffer_mutable_dispatch_supported(const cl::Device &device);
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLHELPERS_H */
|