1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
|
/* ************************************************************************
* Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
* ies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
* PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
* CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ************************************************************************ */
/*! \file
* \brief rocblas-types.h defines data types used by rocblas
*/
#ifndef ROCBLAS_TYPES_H
#define ROCBLAS_TYPES_H
#include "rocblas-export.h"
#include "rocblas_bfloat16.h"
#include <float.h>
#include <math.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#if !defined(ROCBLAS_INTERNAL_API) && !defined(ROCBLAS_NO_DEPRECATED_WARNINGS)
#define ROCBLAS_INTERNAL_DEPRECATION \
__attribute__(( \
deprecated("rocBLAS internal API may change each release. Advise against using.")))
#else
#define ROCBLAS_INTERNAL_DEPRECATION
#endif
#define ROCBLAS_INTERNAL_EXPORT ROCBLAS_EXPORT ROCBLAS_INTERNAL_DEPRECATION
#define ROCBLAS_INTERNAL_EXPORT_NOINLINE \
ROCBLAS_EXPORT __attribute__((noinline)) ROCBLAS_INTERNAL_DEPRECATION
/*! \brief rocblas_handle is a structure holding the rocblas library context.
* It must be initialized using rocblas_create_handle(),
* and the returned handle must be passed
* to all subsequent library function calls.
* It should be destroyed at the end using rocblas_destroy_handle().
*/
typedef struct _rocblas_handle* rocblas_handle;
/*! \brief Forward declaration of hipStream_t */
typedef struct ihipStream_t* hipStream_t;
/*! \brief Forward declaration of hipEvent_t */
typedef struct ihipEvent_t* hipEvent_t;
/*! \brief Opaque base class for device memory allocation */
struct rocblas_device_malloc_base;
// integer types
/*! \brief To specify whether int32 is used for LP64 or int64 is used for ILP64 */
#if defined(rocblas_ILP64)
typedef int64_t rocblas_int;
#else
typedef int32_t rocblas_int;
#endif
/*! \brief Stride between matrices or vectors in strided_batched functions */
#if defined(rocblas_ILP64)
typedef int64_t rocblas_stride;
#else
typedef int64_t rocblas_stride;
#endif
/*! \brief Single precision floating point type */
typedef float rocblas_float;
/*! \brief Double precision floating point type */
typedef double rocblas_double;
#ifdef ROCM_USE_FLOAT16
typedef _Float16 rocblas_half;
#else
/*! \brief Structure definition for rocblas_half */
typedef struct rocblas_half
{
uint16_t data;
} rocblas_half;
#endif
#if !(__cplusplus < 201402L || (!defined(__HCC__) && !defined(__HIPCC__)))
namespace std
{
__device__ __host__ constexpr rocblas_half real(const rocblas_half& a)
{
return a;
}
}
#endif
/* Structure definition of rocblas_int8x4
(For internal use only)
(4.2) When rocblas API takes input --a/b_type as i8_r,
we use --flags (0/1) to determine which TensileType (int8_t or int8x4) to be cast
in order to having a way to call gemm_ex_typecasting<?>(),
we can define a minimal definition of rocblas_int8x4, and int8 is for MI-kernel
********************************************************************************************/
typedef struct
{
int8_t a, b, c, d;
} rocblas_int8x4;
// complex types
#include "rocblas-complex-types.h"
/* ============================================================================================ */
/*! Parameter constants.
* numbering is consistent with CBLAS, ACML and most standard C BLAS libraries
*/
/*! \brief Used to specify whether the matrix is to be transposed or not. */
typedef enum rocblas_operation_
{
rocblas_operation_none = 111, /**< Operate with the matrix. */
rocblas_operation_transpose = 112, /**< Operate with the transpose of the matrix. */
rocblas_operation_conjugate_transpose
= 113 /**< Operate with the conjugate transpose of the matrix. */
} rocblas_operation;
/*! \brief Used by the Hermitian, symmetric and triangular matrix
* routines to specify whether the upper, or lower triangle is being referenced.
*/
typedef enum rocblas_fill_
{
rocblas_fill_upper = 121, /**< Upper triangle. */
rocblas_fill_lower = 122, /**< Lower triangle. */
rocblas_fill_full = 123
} rocblas_fill;
/*! \brief It is used by the triangular matrix routines to specify whether the
* matrix is unit triangular.
*/
typedef enum rocblas_diagonal_
{
rocblas_diagonal_non_unit = 131, /**< Non-unit triangular. */
rocblas_diagonal_unit = 132, /**< Unit triangular. */
} rocblas_diagonal;
/*! \brief Indicates the side matrix A is located relative to matrix B during multiplication. */
typedef enum rocblas_side_
{
rocblas_side_left = 141, /**< Multiply general matrix by symmetric,
Hermitian, or triangular matrix on the left. */
rocblas_side_right = 142, /**< Multiply general matrix by symmetric,
Hermitian, or triangular matrix on the right. */
rocblas_side_both = 143
} rocblas_side;
/*! Parameter constants.
* Numbering continues into next free decimal range but not shared with other BLAS libraries
*/
/*! \brief Indicates the precision width of data stored in a blas type. */
typedef enum rocblas_datatype_
{
rocblas_datatype_f16_r = 150, /**< 16-bit floating point, real */
rocblas_datatype_f32_r = 151, /**< 32-bit floating point, real */
rocblas_datatype_f64_r = 152, /**< 64-bit floating point, real */
rocblas_datatype_f16_c = 153, /**< 16-bit floating point, complex */
rocblas_datatype_f32_c = 154, /**< 32-bit floating point, complex */
rocblas_datatype_f64_c = 155, /**< 64-bit floating point, complex */
rocblas_datatype_i8_r = 160, /**< 8-bit signed integer, real */
rocblas_datatype_u8_r = 161, /**< 8-bit unsigned integer, real */
rocblas_datatype_i32_r = 162, /**< 32-bit signed integer, real */
rocblas_datatype_u32_r = 163, /**< 32-bit unsigned integer, real */
rocblas_datatype_i8_c = 164, /**< 8-bit signed integer, complex */
rocblas_datatype_u8_c = 165, /**< 8-bit unsigned integer, complex */
rocblas_datatype_i32_c = 166, /**< 32-bit signed integer, complex */
rocblas_datatype_u32_c = 167, /**< 32-bit unsigned integer, complex */
rocblas_datatype_bf16_r = 168, /**< 16-bit bfloat, real */
rocblas_datatype_bf16_c = 169, /**< 16-bit bfloat, complex */
rocblas_datatype_invalid = 255, /**< Invalid datatype value, do not use */
} rocblas_datatype;
/* ============================================================================================ */
/**
* @brief rocblas status codes definition
*/
typedef enum rocblas_status_
{
rocblas_status_success = 0, /**< Success */
rocblas_status_invalid_handle = 1, /**< Handle not initialized, invalid or null */
rocblas_status_not_implemented = 2, /**< Function is not implemented */
rocblas_status_invalid_pointer = 3, /**< Invalid pointer argument */
rocblas_status_invalid_size = 4, /**< Invalid size argument */
rocblas_status_memory_error = 5, /**< Failed internal memory allocation, copy or dealloc */
rocblas_status_internal_error = 6, /**< Other internal library failure */
rocblas_status_perf_degraded = 7, /**< Performance degraded due to low device memory */
rocblas_status_size_query_mismatch = 8, /**< Unmatched start/stop size query */
rocblas_status_size_increased = 9, /**< Queried device memory size increased */
rocblas_status_size_unchanged = 10, /**< Queried device memory size unchanged */
rocblas_status_invalid_value = 11, /**< Passed argument not valid */
rocblas_status_continue = 12, /**< Nothing preventing function to proceed */
rocblas_status_check_numerics_fail
= 13, /**< Will be set if the vector/matrix has a NaN/Infinity/denormal value */
} rocblas_status;
/*! \brief Indicates if scalar pointers are on host or device. This is used for
* scalars alpha and beta and for scalar function return values. */
typedef enum rocblas_pointer_mode_
{
/*! \brief Scalar values affected by this variable are located on the host. */
rocblas_pointer_mode_host = 0,
/*! \brief Scalar values affected by this variable are located on the device. */
rocblas_pointer_mode_device = 1
} rocblas_pointer_mode;
/*! \brief Indicates if atomics operations are allowed. Not allowing atomic operations
* may generally improve determinism and repeatability of results at a cost of performance */
typedef enum rocblas_atomics_mode_
{
/*! \brief Algorithms will refrain from atomics where applicable */
rocblas_atomics_not_allowed = 0,
/*! \brief Algorithms will take advantage of atomics where applicable */
rocblas_atomics_allowed = 1,
} rocblas_atomics_mode;
/*! \brief Indicates which performance metric Tensile uses when selecting the optimal
* solution for gemm problems. */
typedef enum rocblas_performance_metric_
{
/*! \brief Use Tensile's default performance metric for solution selection */
rocblas_default_performance_metric = 0,
/*! \brief Select the solution with the highest GFlops across all compute units */
rocblas_device_efficiency_performance_metric = 1,
/*! \brief Select the solution with the highest GFlops per compute unit it uses. This
* may be useful when running multiple small gemm problems simultaneously */
rocblas_cu_efficiency_performance_metric = 2
} rocblas_performance_metric;
/*! \brief Indicates if layer is active with bitmask*/
typedef enum rocblas_layer_mode_
{
/*! \brief No logging will take place. */
rocblas_layer_mode_none = 0x0,
/*! \brief A line containing the function name and value of arguments passed will be printed with each rocBLAS function call. */
rocblas_layer_mode_log_trace = 0x1,
/*! \brief Outputs a line each time a rocBLAS function is called, this line can be used with rocblas-bench to make the same call again. */
rocblas_layer_mode_log_bench = 0x2,
/*! \brief Outputs a YAML description of each rocBLAS function called, along with its arguments and number of times it was called. */
rocblas_layer_mode_log_profile = 0x4,
} rocblas_layer_mode;
/*! \brief Indicates if layer is active with bitmask*/
typedef enum rocblas_gemm_algo_
{
rocblas_gemm_algo_standard = 0x0,
rocblas_gemm_algo_solution_index = 0x1,
} rocblas_gemm_algo;
/*! \brief Which mathematical geam-like operation to perform for geam_ex */
typedef enum rocblas_geam_ex_operation_
{
rocblas_geam_ex_operation_min_plus = 0x0, // Cij = min(Aik + Bkj, Cij)
rocblas_geam_ex_operation_plus_min = 0x1, // Cij = min(Aik, Bkj) + Cij
} rocblas_geam_ex_operation;
/*! \brief Control flags passed into gemm algorithms invoked by Tensile Host */
typedef enum rocblas_gemm_flags_
{
/*! \brief Default empty flags */
rocblas_gemm_flags_none = 0x0,
/*! \brief Before ROCm 4.2, this flags is not implemented and rocblas uses packed-Int8x4 by default.
* After ROCm 4.2, set flag is neccesary if we want packed-Int8x4. Default (0x0) uses unpacked. */
rocblas_gemm_flags_pack_int8x4 = 0x1,
/*! \brief Select the gemm problem with the highest efficiency per compute unit used. Useful for running multiple smaller problems
* simultaneously. This takes precedence over the performance metric set in rocblas_handle and currently only works for
* gemm_*_ex problems. */
rocblas_gemm_flags_use_cu_efficiency = 0x2,
/*! \brief Select an alternate implementation for the MI200 FP16 HPA
* (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix
* instructions with reduced accuracy in cases where computation cannot
* tolerate the FP16 matrix instructions flushing subnormal FP16
* input/output data to zero. See the "MI200 (gfx90a) Considerations"
* section for more details. */
rocblas_gemm_flags_fp16_alt_impl = 0x4,
rocblas_gemm_flags_check_solution_index = 0x8
} rocblas_gemm_flags;
// rocblas_int8_type_for_hipblas enum will be removed in a future release.
// This enum is used by hipBLAS and support for pack_int8x4 datatype will be removed from hipBLAS.
typedef enum rocblas_int8_type_for_hipblas_
{
rocblas_int8_type_for_hipblas_default = 0x0,
rocblas_int8_type_for_hipblas_int8 = 0x1,
rocblas_int8_type_for_hipblas_pack_int8x4 = 0x2
} rocblas_int8_type_for_hipblas;
/*! \brief Union for representing scalar values */
typedef union rocblas_union_u
{
rocblas_half h;
float s;
double d;
int32_t i;
rocblas_float_complex c;
rocblas_double_complex z;
} rocblas_union_t;
/*! \brief Numerical checking for verifying the Input and Output vector/matrix of the rocBLAS functions for a NaN, zero, infinity and denormal value*/
typedef enum rocblas_check_numerics_mode_
{
//No numeric checks
rocblas_check_numerics_mode_no_check = 0x0,
//Fully informative, prints results from all checks to console
rocblas_check_numerics_mode_info = 0x1,
//Prints result only if has_NaN==true||has_Inf==true||has_denorm==true
rocblas_check_numerics_mode_warn = 0x2,
//Return 'rocblas_status_check_numeric_fail' status if there is NaN/Inf/denormal value
rocblas_check_numerics_mode_fail = 0x4,
} rocblas_check_numerics_mode;
#endif /* ROCBLAS_TYPES_H */
|