1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
//------------------------------------------------------------------------------
// GB_ek_slice_kernels.h: slice the entries and vectors of a matrix
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
#ifndef GB_EK_SLICE_KERNELS_H
#define GB_EK_SLICE_KERNELS_H
static inline void GB_e_slice
(
int64_t *Slice, // array of size ntasks+1
int64_t e, // number items to partition amongst the tasks
const int ntasks // # of tasks
)
{
Slice [0] = 0 ;
for (int tid = 1 ; tid < ntasks ; tid++)
{
Slice [tid] = (int64_t) GB_PART (tid, e, ntasks) ;
}
Slice [ntasks] = e ;
}
//------------------------------------------------------------------------------
// GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice
//------------------------------------------------------------------------------
static inline void GB_ek_slice_ntasks
(
// output
int *nthreads, // # of threads to use for GB_ek_slice
int *ntasks, // # of tasks to create for GB_ek_slice
// input
int64_t anz_held, // GB_nnz_held(A) of the matrix to slice
int ntasks_per_thread, // # of tasks per thread
double work, // total work to do
double chunk, // give each thread at least this much work
int nthreads_max // max # of threads to use
)
{
if (anz_held == 0)
{
(*nthreads) = 1 ;
(*ntasks) = 1 ;
}
else
{
(*nthreads) = GB_nthreads (work, chunk, nthreads_max) ;
(*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ;
(*ntasks) = (int) GB_IMIN (*ntasks, anz_held) ;
(*ntasks) = (int) GB_IMAX (*ntasks, 1) ;
}
}
//------------------------------------------------------------------------------
// GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice
//------------------------------------------------------------------------------
// chunk and nthreads_max must already be defined.
#define GB_SLICE_MATRIX_WORK2(X,ntasks_per_thread,work,xnz_held) \
GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), xnz_held, \
ntasks_per_thread, work, chunk, nthreads_max) ; \
GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ; \
if (X ## _ek_slicing == NULL) \
{ \
/* out of memory */ \
GB_FREE_ALL ; \
return (GrB_OUT_OF_MEMORY) ; \
} \
GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;
#define GB_SLICE_MATRIX_WORK(X,ntasks_per_thread,work,xnz_held) \
GB_SLICE_MATRIX_WORK2 (X,ntasks_per_thread,work,xnz_held) \
const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ; \
const int64_t *klast_ ## X ## slice = X ## _ek_slicing + X ## _ntasks ; \
const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;
#define GB_SLICE_MATRIX(X,ntasks_per_thread) \
const int64_t X ## _held = GB_nnz_held (X) ; \
const double X ## _wrk = X ## _held + X->nvec ; \
GB_SLICE_MATRIX_WORK (X, ntasks_per_thread, X ## _wrk, X ## _held)
#define GB_SLICE_MATRIX2(X,ntasks_per_thread) \
const int64_t X ## _held = GB_nnz_held (X) ; \
const double X ## _wrk = X ## _held + X->nvec ; \
GB_SLICE_MATRIX_WORK2 (X, ntasks_per_thread, X ## _wrk, X ## _held)
//------------------------------------------------------------------------------
// GB_GET_PA_AND_PC: find the part of A(:,k) and C(:,k) for this task
//------------------------------------------------------------------------------
// The tasks were generated by GB_ek_slice.
// as a macro, where p0, p1, and p2 are first obtained as above:
// p0 = GBp (Ap, k, avlen) ;
// p1 = GBp (Ap, k+1, avlen) ;
// p2 = GBp (Cp, k, cvlen) ;
#define GB_GET_PA_AND_PC(pA_start,pA_end,pC,tid,k,kfirst,klast,pstart_slice,Cp_kfirst,p0,p1,p2) \
int64_t pA_start, pA_end, pC ; \
if (k == kfirst) \
{ \
/* First vector for task tid; may only be partially owned. */ \
pA_start = pstart_slice [tid] ; \
pA_end = GB_IMIN (p1, pstart_slice [tid+1]) ; \
pC = Cp_kfirst [tid] ; \
} \
else if (k == klast) \
{ \
/* Last vector for task tid; may only be partially owned. */ \
pA_start = p0 ; \
pA_end = pstart_slice [tid+1] ; \
pC = p2 ; \
} \
else \
{ \
/* task tid entirely owns this vector A(:,k). */ \
pA_start = p0 ; \
pA_end = p1 ; \
pC = p2 ; \
}
//------------------------------------------------------------------------------
// GB_GET_PA: find the part of A(:,k) to be operated on by this task
//------------------------------------------------------------------------------
// The tasks were generated by GB_ek_slice.
// as a macro, where p0 and p1 are first obtained as above:
// p0 = GBp (Ap, k, avlen) ;
// p1 = GBp (Ap, k+1, avlen) ;
#define GB_GET_PA(pA_start,pA_end,tid,k,kfirst,klast,pstart_slice,p0,p1) \
int64_t pA_start, pA_end ; \
if (k == kfirst) \
{ \
/* First vector for task tid; may only be partially owned. */ \
pA_start = pstart_slice [tid] ; \
pA_end = GB_IMIN (p1, pstart_slice [tid+1]) ; \
} \
else if (k == klast) \
{ \
/* Last vector for task tid; may only be partially owned. */ \
pA_start = p0 ; \
pA_end = pstart_slice [tid+1] ; \
} \
else \
{ \
/* task tid entirely owns this vector A(:,k). */ \
pA_start = p0 ; \
pA_end = p1 ; \
}
#endif
|