1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
|
/*
-- MAGMA (version 2.9.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2025
@generated from magmablas/zherk_vbatched.cpp, normal z -> s, Wed Jan 22 14:42:08 2025
@author Ahmad Abdelfattah
*/
#include "magma_internal.h"
#include "commonblas_s.h"
#define REAL
/******************************************************************************/
extern "C" void
magmablas_ssyrk_vbatched_max(
magma_uplo_t uplo, magma_trans_t trans,
magma_int_t* n, magma_int_t* k,
float alpha,
float const * const * dA_array, magma_int_t* ldda,
float beta,
float **dC_array, magma_int_t* lddc,
magma_int_t batchCount,
magma_int_t max_n, magma_int_t max_k, magma_queue_t queue )
{
magma_int_t info = 0;
#ifdef COMPLEX
info = magma_herk_vbatched_checker( uplo, trans, n, k, ldda, lddc, batchCount, queue );
#else
info = magma_syrk_vbatched_checker( 0, uplo, trans, n, k, ldda, lddc, batchCount, queue );
#endif
if (info != 0) {
magma_xerbla( __func__, -(info) );
return;
}
magmablas_ssyrk_vbatched_max_nocheck(
uplo, trans,
n, k,
alpha, dA_array, ldda,
beta, dC_array, lddc,
batchCount,
max_n, max_k, queue );
}
/******************************************************************************/
extern "C" void
magmablas_ssyrk_vbatched_nocheck(
magma_uplo_t uplo, magma_trans_t trans,
magma_int_t* n, magma_int_t* k,
float alpha,
float const * const * dA_array, magma_int_t* ldda,
float beta,
float **dC_array, magma_int_t* lddc,
magma_int_t batchCount, magma_queue_t queue )
{
// compute the max. dimensions
magma_imax_size_2(n, k, batchCount, queue);
magma_int_t max_n, max_k;
magma_igetvector_async(1, &n[batchCount], 1, &max_n, 1, queue);
magma_igetvector_async(1, &k[batchCount], 1, &max_k, 1, queue);
magma_queue_sync( queue );
magmablas_ssyrk_vbatched_max_nocheck(
uplo, trans,
n, k,
alpha, dA_array, ldda,
beta, dC_array, lddc,
batchCount,
max_n, max_k, queue );
}
/***************************************************************************//**
Purpose
-------
SSYRK performs one of the symmetric rank k operations
C := alpha*A*A**H + beta*C,
or
C := alpha*A**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n symmetric
matrix and A is an n by k matrix in the first case and a k by n
matrix in the second case.
Parameters
----------
@param[in]
uplo magma_uplo_t.
On entry, uplo specifies whether the upper or lower
triangular part of the array C is to be referenced as
follows:
uplo = MagmaUpper Only the upper triangular part of C
is to be referenced.
uplo = MagmaLower Only the lower triangular part of C
is to be referenced.
@param[in]
trans magma_trans_t.
On entry, trans specifies the operation to be performed as
follows:
trans = MagmaNoTrans C := alpha*A*A**H + beta*C.
trans = MagmaConjTrans C := alpha*A**H*A + beta*C.
@param[in]
n Array of integers, size (batchCount + 1).
On entry, each INTEGER N specifies the order of the corresponding matrix C.
N must be at least zero.
The last element of the array is used internally by the routine.
@param[in]
k Array of integers, size (batchCount + 1).
On entry with trans = MagmaNoTrans, each INTEGER K specifies the number
of columns of the corresponding matrix A, and on entry with
trans = MagmaConjTrans, K specifies the number of rows of the
corresponding matrix A. K must be at least zero.
The last element of the array is used internally by the routine.
@param[in]
alpha REAL
On entry, ALPHA specifies the scalar alpha.
@param[in]
dA_array Array of pointers, size (batchCount).
Each is a REAL array of DIMENSION ( LDDA, Ka ), where Ka is
K when trans = MagmaNoTrans, and is N otherwise.
Before entry with trans = MagmaNoTrans, the leading N by K
part of the corresponding array A must contain the matrix A, otherwise
the leading K by N part of the corresponding array must contain the
matrix A.
@param[in]
ldda Array of integers, size (batchCount + 1).
On entry, each INTEGER LDDA specifies the first dimension of the corresponding
matrix A as declared in the calling (sub) program. When trans = MagmaNoTrans then
LDDA must be at least max( 1, N ), otherwise ldda must be at
least max( 1, K ).
The last element of the array is used internally by the routine.
@param[in]
beta REAL.
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then dC need not be set on input.
@param[in,out]
dC_array Array of pointers, size (batchCount).
Each is a REAL array of DIMENSION ( LDDC, N ).
Before entry with uplo = MagmaUpper, the leading N by N
upper triangular part of the corresponding array C must
contain the upper triangular part of the corresponding
symmetric matrix and the strictly lower triangular part of C
is not referenced. On exit, the upper triangular part of the
array C is overwritten by the upper triangular part of
the updated matrix.
Before entry with uplo = MagmaLower, the leading N by N
lower triangular part of the corresponding array C must
contain the lower triangular part of the corresponding
symmetric matrix and the strictly upper triangular part
of C is not referenced. On exit, the lower triangular
part of the array C is overwritten by the lower triangular
part of the updated matrix.
Note that the imaginary parts of the diagonal elements need
not be set, they are assumed to be zero, and on exit they
are set to zero.
@param[in]
lddc Array of integers, size (batchCount + 1).
On entry, each INTEGER LDDC specifies the first dimension of the
corresponding matrix C as declared in the calling (sub) program.
LDDC must be at least max( 1, M ).
@param[in]
batchCount INTEGER
The number of matrices to operate on.
@param[in]
queue magma_queue_t
Queue to execute in.
@ingroup magma_herk_batched
*******************************************************************************/
extern "C" void
magmablas_ssyrk_vbatched(
magma_uplo_t uplo, magma_trans_t trans,
magma_int_t* n, magma_int_t* k,
float alpha,
float const * const * dA_array, magma_int_t* ldda,
float beta,
float **dC_array, magma_int_t* lddc,
magma_int_t batchCount, magma_queue_t queue )
{
magma_int_t info = 0;
#ifdef COMPLEX
info = magma_herk_vbatched_checker( uplo, trans, n, k, ldda, lddc, batchCount, queue );
#else
info = magma_syrk_vbatched_checker( 0, uplo, trans, n, k, ldda, lddc, batchCount, queue );
#endif
if (info != 0) {
magma_xerbla( __func__, -(info) );
return;
}
// compute the max. dimensions
magma_imax_size_2(n, k, batchCount, queue);
magma_int_t max_n, max_k;
magma_igetvector_async(1, &n[batchCount], 1, &max_n, 1, queue);
magma_igetvector_async(1, &k[batchCount], 1, &max_k, 1, queue);
magma_queue_sync( queue );
magmablas_ssyrk_vbatched_max_nocheck(
uplo, trans,
n, k,
alpha, dA_array, ldda,
beta, dC_array, lddc,
batchCount,
max_n, max_k, queue );
}
|