1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
|
//------------------------------------------------------------------------------
// GraphBLAS/CUDA/GB_cuda_AxB_dot3_branch: decide to use GPU for dot3
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// Decide branch direction for GPU use for the dot-product C<M>=A'*B
#include "GB_cuda.hpp"
#include <cuda_runtime.h>
bool GB_cuda_AxB_dot3_branch
(
const GrB_Matrix M, // mask matrix
const bool Mask_struct, // if true, use the only structure of M
const GrB_Matrix A, // input matrix
const GrB_Matrix B, // input matrix
const GrB_Semiring semiring, // semiring that defines C=A*B
const bool flipxy // if true, do z=fmult(b,a) vs fmult(a,b)
)
{
if (!GB_cuda_type_branch (A->type) ||
!GB_cuda_type_branch (B->type) ||
!GB_cuda_type_branch (semiring->multiply->xtype) ||
!GB_cuda_type_branch (semiring->multiply->ytype) ||
!GB_cuda_type_branch (semiring->multiply->ztype))
{
// one or more types are not yet supported on the GPU
return (false) ;
}
if (A->vlen == 0)
{
// C has no entries: no need to compute it on the GPU
return (false) ;
}
// very rough estimate of the work to do
double adeg = ((double) GB_nnz (A)) / ((double) GB_IMAX (1, A->nvec)) ;
double bdeg = ((double) GB_nnz (B)) / ((double) GB_IMAX (1, B->nvec)) ;
double work = GB_nnz (M) * GB_IMIN (adeg, bdeg) ;
int ngpus_to_use = GB_ngpus_to_use (work) ;
GBURBLE (" work:%g GPUs:%d ", work, ngpus_to_use) ;
if (ngpus_to_use > 0)
{
// FIXME: or do this in GB_AxB_dot3_cuda
// int gpu_id = GB_Context_gpu_id_get ( ) ;
// cudaSetDevice (gpu_id) ;
return true ;
}
else
{
// std::cout << "Not using cuda path for dot3." << std::endl;
return false ;
}
}
|