1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
|
/*
-- MAGMA (version 2.9.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2025
@generated from magmablas/zher2k_vbatched.cpp, normal z -> c, Wed Jan 22 14:42:08 2025
@author Jakub Kurzak
@author Stan Tomov
@author Mark Gates
@author Azzam Haidar
@author Ahmad Abdelfattah
*/
#include "magma_internal.h"
#include "commonblas_c.h"
#define COMPLEX
/******************************************************************************/
extern "C" void
magmablas_cher2k_vbatched_max_nocheck(
magma_uplo_t uplo, magma_trans_t trans, magma_int_t* n, magma_int_t* k,
magmaFloatComplex alpha,
magmaFloatComplex const * const * dA_array, magma_int_t* ldda,
magmaFloatComplex const * const * dB_array, magma_int_t* lddb,
float beta, magmaFloatComplex **dC_array, magma_int_t* lddc,
magma_int_t batchCount,
magma_int_t max_n, magma_int_t max_k, magma_queue_t queue )
{
magmaFloatComplex cbeta = MAGMA_C_MAKE(beta, 0.);
magmaFloatComplex c_one = MAGMA_C_MAKE(1., 0.);
if( trans == MagmaNoTrans){
magmablas_cherk_internal_vbatched(uplo, MagmaNoTrans, n, k, alpha, dA_array, ldda, dB_array, lddb, cbeta, dC_array, lddc, max_n, max_k, batchCount, queue );
magmablas_cherk_internal_vbatched(uplo, MagmaNoTrans, n, k, MAGMA_C_CONJ(alpha), dB_array, lddb, dA_array, ldda, c_one, dC_array, lddc, max_n, max_k, batchCount, queue );
}else{
magmablas_cherk_internal_vbatched(uplo, Magma_ConjTrans, n, k, alpha, dA_array, ldda, dB_array, lddb, cbeta, dC_array, lddc, max_n, max_k, batchCount, queue );
magmablas_cherk_internal_vbatched(uplo, Magma_ConjTrans, n, k, MAGMA_C_CONJ(alpha), dB_array, lddb, dA_array, ldda, c_one, dC_array, lddc, max_n, max_k, batchCount, queue );
}
}
/******************************************************************************/
extern "C" void
magmablas_cher2k_vbatched_max(
magma_uplo_t uplo, magma_trans_t trans, magma_int_t* n, magma_int_t* k,
magmaFloatComplex alpha,
magmaFloatComplex const * const * dA_array, magma_int_t* ldda,
magmaFloatComplex const * const * dB_array, magma_int_t* lddb,
float beta, magmaFloatComplex **dC_array, magma_int_t* lddc,
magma_int_t batchCount,
magma_int_t max_n, magma_int_t max_k, magma_queue_t queue )
{
magma_int_t info = 0;
#ifdef COMPLEX
info = magma_her2k_vbatched_checker( uplo, trans, n, k, ldda, lddb, lddc, batchCount, queue );
#else
info = magma_syr2k_vbatched_checker( 0, uplo, trans, n, k, ldda, lddb, lddc, batchCount, queue );
#endif
if (info != 0) {
magma_xerbla( __func__, -(info) );
return;
}
magmablas_cher2k_vbatched_max_nocheck(
uplo, trans,
n, k,
alpha, dA_array, ldda,
dB_array, lddb,
beta, dC_array, lddc,
batchCount, max_n, max_k, queue );
}
/******************************************************************************/
extern "C" void
magmablas_cher2k_vbatched_nocheck(
magma_uplo_t uplo, magma_trans_t trans, magma_int_t* n, magma_int_t* k,
magmaFloatComplex alpha,
magmaFloatComplex const * const * dA_array, magma_int_t* ldda,
magmaFloatComplex const * const * dB_array, magma_int_t* lddb,
float beta, magmaFloatComplex **dC_array, magma_int_t* lddc,
magma_int_t batchCount, magma_queue_t queue )
{
// compute the max. dimensions
magma_imax_size_2(n, k, batchCount, queue);
magma_int_t max_n, max_k;
magma_igetvector_async(1, &n[batchCount], 1, &max_n, 1, queue);
magma_igetvector_async(1, &k[batchCount], 1, &max_k, 1, queue);
magma_queue_sync( queue );
magmablas_cher2k_vbatched_max_nocheck(
uplo, trans,
n, k,
alpha, dA_array, ldda,
dB_array, lddb,
beta, dC_array, lddc,
batchCount, max_n, max_k, queue );
}
/***************************************************************************//**
Purpose
-------
CHER2K performs one of the Hermitian rank 2k operations
C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
or
C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
where alpha and beta are scalars with beta real, C is an n by n
Hermitian matrix and A and B are n by k matrices in the first case
and k by n matrices in the second case.
Parameters
----------
@param[in]
uplo magma_uplo_t.
On entry, UPLO specifies whether the upper or lower
triangular part of the array C is to be referenced as
follows:
- = MagmaUpper: Only the upper triangular part of C is to be referenced.
- = MagmaLower: Only the lower triangular part of C is to be referenced.
@param[in]
trans magma_trans_t.
On entry, TRANS specifies the operation to be performed as
follows:
- = MagmaNoTrans: C := alpha*A*B**H + conj( alpha )*B*A**H + beta*C.
- = Magma_ConjTrans: C := alpha*A**H*B + conj( alpha )*B**H*A + beta*C.
@param[in]
n Array of integers, size(batchCount + 1).
On entry, each INTEGER N specifies the order of the corresponding matrix C.
N must be at least zero.
The last element of the array is used internally by the routine.
@param[in]
k Array of integers, size(batchCount + 1).
On entry with TRANS = MagmaNoTrans, each INTEGER K specifies the number
of columns of the corresponding matrices A and B, and on entry with
TRANS = Magma_ConjTrans, K specifies the number of rows of the
corresponding matrices A and B. K must be at least zero.
The last element of the array is used internally by the routine.
@param[in]
alpha COMPLEX.
On entry, ALPHA specifies the scalar alpha.
@param[in]
dA_array Array of pointers, dimension (batchCount).
Each is a COMPLEX array of DIMENSION ( LDDA, Ka ), where Ka is
K when TRANS = MagmaNoTrans, and is N otherwise.
Before entry with TRANS = MagmaNoTrans, the leading N by K
part of the array A must contain the matrix A, otherwise
the leading K by N part of the array A must contain the
matrix A.
@param[in]
ldda Array of integers, size(batchCount + 1).
On entry, each INTEGER LDDA specifies the first dimension of the
corresponding matrix A as declared in the calling (sub) program.
When TRANS = MagmaNoTrans then LDDA must be at least max( 1, N ),
otherwise LDDA must be at least max( 1, K ).
The last element of the array is used internally by the routine.
@param[in]
dB_array Array of pointers, dimension (batchCount).
Each is a COMPLEX array of DIMENSION ( ldb, kb ), where kb is
k when TRANS = MagmaNoTrans, and is n otherwise.
Before entry with TRANS = MagmaNoTrans, the leading n by k
part of the array B must contain the matrix B, otherwise
the leading k by n part of the array B must contain the
matrix B.
@param[in]
lddb Array of integers, size(batchCount + 1).
On entry, each INTEGER LDDB specifies the first dimension of the
corresponding matrix B as declared in the calling (sub) program.
When TRANS = MagmaNoTrans then LDDB must be at least max( 1, N ),
otherwise LDDB must be at least max( 1, K ). Unchanged on exit.
The last element of the array is used internally by the routine.
@param[in]
beta REAL.
On entry, BETA specifies the scalar beta.
@param[in,out]
dC_array Array of pointers, dimension (batchCount).
Each is COMPLEX array of DIMENSION ( lddc, n ).
Before entry with UPLO = MagmaUpper, the leading n by n
upper triangular part of the array C must contain the upper
triangular part of the Hermitian matrix and the strictly
lower triangular part of C is not referenced. On exit, the
upper triangular part of the array C is overwritten by the
upper triangular part of the updated matrix.
Before entry with UPLO = MagmaLower, the leading n by n
lower triangular part of the array C must contain the lower
triangular part of the Hermitian matrix and the strictly
upper triangular part of C is not referenced. On exit, the
lower triangular part of the array C is overwritten by the
lower triangular part of the updated matrix.
Note that the imaginary parts of the diagonal elements need
not be set, they are assumed to be zero, and on exit they
are set to zero.
@param[in]
lddc Array of integers, size(batchCount + 1).
On entry, each INTEGER LDDC specifies the first dimension of the corresponding
matrix C as declared in the calling (sub) program. LDDC must be at least
max( 1, N ).
The last element of the array is used internally by the routine.
@param[in]
batchCount INTEGER
The number of matrices to operate on.
@param[in]
queue magma_queue_t
Queue to execute in.
@ingroup magma_her2k_batched
*******************************************************************************/
extern "C" void
magmablas_cher2k_vbatched(
magma_uplo_t uplo, magma_trans_t trans, magma_int_t* n, magma_int_t* k,
magmaFloatComplex alpha,
magmaFloatComplex const * const * dA_array, magma_int_t* ldda,
magmaFloatComplex const * const * dB_array, magma_int_t* lddb,
float beta, magmaFloatComplex **dC_array, magma_int_t* lddc,
magma_int_t batchCount, magma_queue_t queue )
{
magma_int_t info = 0;
#ifdef COMPLEX
info = magma_her2k_vbatched_checker( uplo, trans, n, k, ldda, lddb, lddc, batchCount, queue );
#else
info = magma_syr2k_vbatched_checker( 0, uplo, trans, n, k, ldda, lddb, lddc, batchCount, queue );
#endif
if (info != 0) {
magma_xerbla( __func__, -(info) );
return;
}
// compute the max. dimensions
magma_imax_size_2(n, k, batchCount, queue);
magma_int_t max_n, max_k;
magma_igetvector_async(1, &n[batchCount], 1, &max_n, 1, queue);
magma_igetvector_async(1, &k[batchCount], 1, &max_k, 1, queue);
magma_queue_sync( queue );
magmablas_cher2k_vbatched_max_nocheck(
uplo, trans,
n, k,
alpha, dA_array, ldda,
dB_array, lddb,
beta, dC_array, lddc,
batchCount, max_n, max_k, queue );
}
|