File: GB_cuda_reduce_to_scalar_branch.cpp

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (55 lines) | stat: -rw-r--r-- 1,591 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
//------------------------------------------------------------------------------
// GraphBLAS/CUDA/GB_cuda_reduce_to_scalar_branch: decide to use GPU for reduce
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// Decide branch direction for GPU use for the reduction to scalar

#include "GB_cuda_reduce.hpp"

bool GB_cuda_reduce_to_scalar_branch    // return true to use the GPU
(
    const GrB_Monoid monoid,        // monoid to do the reduction
    const GrB_Matrix A              // input matrix
)
{

    if (!GB_cuda_type_branch (A->type) ||
        !GB_cuda_type_branch (monoid->op->ztype))
    {
        // one or more types are not yet supported on the GPU
        return (false) ;
    }

    if (monoid->op->opcode == GB_ANY_binop_code)
    {
        // the ANY monoid takes O(1) time; do it on the CPU:
        return (false) ;
    }

    if (A->iso)
    {
        // A iso takes O(log(nvals(A))) time; do it on the CPU:
        return (false) ;
    }

    // see if there is enough work to do on the GPU
    double work = GB_nnz_held (A) ;
    int ngpus_to_use = GB_ngpus_to_use (work) ;
    GBURBLE (" work:%g gpus:%d ", work, ngpus_to_use) ;
    if (ngpus_to_use > 0)
    {
        // FIXME: gpu_id = GB_Context_gpu_id_get ( ) ;
        // cudaSetDevice (gpu_id) ;
        return (true) ;
    }
    else
    {
        return (false) ;
    }
}