1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
//------------------------------------------------------------------------------
// GB_nthreads.h: determine # of threads to use for parallel region
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
#ifndef GB_NTHREADS_H
#define GB_NTHREADS_H
//------------------------------------------------------------------------------
// Determine # of threads to use via global setting and descriptor
//------------------------------------------------------------------------------
// The GB_Context Context struct contains the number of threads to use in the
// operation. It is normally determined from the user's descriptor, with a
// default of nthreads_max = GxB_DEFAULT (that is, zero). The default rule is
// to let GraphBLAS determine the number of threads automatically by selecting
// a number of threads between 1 and nthreads_max. GrB_init initializes
// nthreads_max to omp_get_max_threads. Both the global value and the value in
// a descriptor can set/queried by GxB_set / GxB_get.
// Some GrB_Matrix and GrB_Vector methods do not take a descriptor, however
// (GrB_*_dup, _build, _exportTuples, _clear, _nvals, _wait, and GxB_*_resize).
// For those methods the default rule is always used (nthreads_max =
// GxB_DEFAULT), which then relies on the global nthreads_max.
//------------------------------------------------------------------------------
// GB_GET_NTHREADS_MAX: determine max # of threads for OpenMP parallelism.
//------------------------------------------------------------------------------
// GB_GET_NTHREADS_MAX obtains the max # of threads to use and the chunk
// size from the Context. If Context is NULL then a single thread *must*
// be used. If Context->nthreads_max is <= GxB_DEFAULT, then select
// automatically: between 1 and nthreads_max, depending on the problem
// size. Below is the default rule. Any function can use its own rule
// instead, based on Context, chunk, nthreads_max, and the problem size.
// No rule can exceed nthreads_max.
#define GB_GET_NTHREADS_MAX(nthreads_max,chunk,Context) \
int nthreads_max = (Context == NULL) ? 1 : Context->nthreads_max ; \
if (nthreads_max <= GxB_DEFAULT) \
{ \
nthreads_max = GB_Global_nthreads_max_get ( ) ; \
} \
double chunk = (Context == NULL) ? GxB_DEFAULT : Context->chunk ; \
if (chunk <= GxB_DEFAULT) \
{ \
chunk = GB_Global_chunk_get ( ) ; \
}
//------------------------------------------------------------------------------
// GB_nthreads: determine # of threads to use for a parallel loop or region
//------------------------------------------------------------------------------
// If work < 2*chunk, then only one thread is used.
// else if work < 3*chunk, then two threads are used, and so on.
static inline int GB_nthreads // return # of threads to use
(
double work, // total work to do
double chunk, // give each thread at least this much work
int nthreads_max // max # of threads to use
)
{
work = GB_IMAX (work, 1) ;
chunk = GB_IMAX (chunk, 1) ;
int64_t nthreads = (int64_t) floor (work / chunk) ;
nthreads = GB_IMIN (nthreads, nthreads_max) ;
nthreads = GB_IMAX (nthreads, 1) ;
return ((int) nthreads) ;
}
#endif
|