1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
//------------------------------------------------------------------------------
// GB_transpose_sparse_template: C=op(cast(A')), transpose, typecast, & apply op
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
{
//--------------------------------------------------------------------------
// get inputs
//--------------------------------------------------------------------------
#undef GBh_AB
#ifdef GB_BIND_1ST
// see discussion in GB_transpose_template.c
GB_Bp_DECLARE (Ap, const) ; GB_Bp_PTR (Ap, A) ;
GB_Bh_DECLARE (Ah, const) ; GB_Bh_PTR (Ah, A) ;
GB_Bi_DECLARE (Ai, const) ; GB_Bi_PTR (Ai, A) ;
#define GBh_AB(Ah,k) GBh_B(Ah,k)
#else
GB_Ap_DECLARE (Ap, const) ; GB_Ap_PTR (Ap, A) ;
GB_Ah_DECLARE (Ah, const) ; GB_Ah_PTR (Ah, A) ;
GB_Ai_DECLARE (Ai, const) ; GB_Ai_PTR (Ai, A) ;
#define GBh_AB(Ah,k) GBh_A(Ah,k)
#endif
GB_Ci_DECLARE (Ci, ) ; GB_Ci_PTR (Ci, C) ;
//--------------------------------------------------------------------------
// C = A'
//--------------------------------------------------------------------------
if (nthreads == 1)
{
//----------------------------------------------------------------------
// sequential method
//----------------------------------------------------------------------
// Cp and workspace are of type GB_W_TYPE
GB_W_TYPE *restrict workspace = (GB_W_TYPE *) (Workspaces [0]) ;
const int64_t anvec = A->nvec ;
for (int64_t k = 0 ; k < anvec ; k++)
{
// iterate over the entries in A(:,j)
int64_t j = GBh_AB (Ah, k) ;
int64_t pA_start = GB_IGET (Ap, k) ;
int64_t pA_end = GB_IGET (Ap, k+1) ;
for (int64_t pA = pA_start ; pA < pA_end ; pA++)
{
// C(j,i) = A(i,j)
int64_t i = GB_IGET (Ai, pA) ;
GB_W_TYPE pC = workspace [i]++ ;
// Ci [pC] = j ;
GB_ISET (Ci, pC, j) ;
#ifndef GB_ISO_TRANSPOSE
// Cx [pC] = op (Ax [pA])
GB_APPLY_OP (pC, pA) ;
#endif
}
}
}
else if (nworkspaces == 1)
{
//----------------------------------------------------------------------
// atomic method
//----------------------------------------------------------------------
GB_W_TYPE *restrict workspace = (GB_W_TYPE *) (Workspaces [0]) ;
int tid ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
for (int64_t k = A_slice [tid] ; k < A_slice [tid+1] ; k++)
{
// iterate over the entries in A(:,j)
int64_t j = GBh_AB (Ah, k) ;
int64_t pA_start = GB_IGET (Ap, k) ;
int64_t pA_end = GB_IGET (Ap, k+1) ;
for (int64_t pA = pA_start ; pA < pA_end ; pA++)
{
// C(j,i) = A(i,j)
int64_t i = GB_IGET (Ai, pA) ;
// do this atomically: pC = workspace [i]++
GB_W_TYPE pC ;
GB_ATOMIC_CAPTURE_INC (pC, workspace [i]) ;
// Ci [pC] = j ;
GB_ISET (Ci, pC, j) ;
#ifndef GB_ISO_TRANSPOSE
// Cx [pC] = op (Ax [pA])
GB_APPLY_OP (pC, pA) ;
#endif
}
}
}
}
else
{
//----------------------------------------------------------------------
// non-atomic method
//----------------------------------------------------------------------
int tid ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
GB_W_TYPE *restrict workspace = (GB_W_TYPE *) (Workspaces [tid]) ;
for (int64_t k = A_slice [tid] ; k < A_slice [tid+1] ; k++)
{
// iterate over the entries in A(:,j)
int64_t j = GBh_AB (Ah, k) ;
int64_t pA_start = GB_IGET (Ap, k) ;
int64_t pA_end = GB_IGET (Ap, k+1) ;
for (int64_t pA = pA_start ; pA < pA_end ; pA++)
{
// C(j,i) = A(i,j)
int64_t i = GB_IGET (Ai, pA) ;
GB_W_TYPE pC = workspace [i]++ ;
// Ci [pC] = j ;
GB_ISET (Ci, pC, j) ;
#ifndef GB_ISO_TRANSPOSE
// Cx [pC] = op (Ax [pA])
GB_APPLY_OP (pC, pA) ;
#endif
}
}
}
}
}
#undef GB_W_TYPE
#undef GB_ATOMIC_CAPTURE_INC
#undef GBh_AB
|