File: GB_AxB_dot3_cuda_branch.cpp

package info (click to toggle)
suitesparse 1%3A5.12.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 176,720 kB
  • sloc: ansic: 1,193,914; cpp: 31,704; makefile: 6,638; fortran: 1,927; java: 1,826; csh: 765; ruby: 725; sh: 529; python: 333; perl: 225; sed: 164; awk: 35
file content (70 lines) | stat: -rw-r--r-- 2,851 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

// Decide branch direction for GPU use for the dot-product MxM
extern "C" 
{
  #include "GB_mxm.h"
}
#include "GB_cuda.h"
#include <cuda_runtime.h>

bool GB_AxB_dot3_cuda_branch 
(
    const GrB_Matrix M,             // mask matrix
    const bool Mask_struct,         // if true, use the only structure of M
    const GrB_Matrix A,             // input matrix
    const GrB_Matrix B,             // input matrix
    const GrB_Semiring semiring,    // semiring that defines C=A*B
    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
    GB_Context Context
)
{

    printf ("HERE IN cuda branch, mask_struct is %d\n", Mask_struct) ;

        // very rough estimate of the work to do
        double adeg = ((double) GB_nnz (A)) / ((double) GB_IMAX (1, A->nvec)) ;
        double bdeg = ((double) GB_nnz (B)) / ((double) GB_IMAX (1, B->nvec)) ;
        double work = GB_nnz (M) * GB_IMIN (adeg, bdeg) ;

        // TODO if A or B are not accessed (first, 2nd, or pair ops)
        // then the type if A can be user-defined here, for CUDA.

        // TODO: the test for a built-in semiring needs to be
        // removed, to allow for the generation of CUDA kernels for non-
        // built-in semirings.  The code generation process currently does not
        // support user-defined types and operators, but this needs to be
        // handled.  In addition, CUDA kernels could be built for semirings
        // that are not built-in, but consist solely of built-in types and
        // operators (such as BOR_BSHIFT on INT32 inputs).

        int ngpus_to_use = GB_ngpus_to_use (work) ;
        GBURBLE (" work:%g GPUs:%d ", work, ngpus_to_use) ;
        if (ngpus_to_use > 0
            // FIXME: FUTURE: user-defined types and operators
//            && (semiring->header_size == 0)     // semiring is built-in
            && (A->type->code != GB_UDT_code)
            && (B->type->code != GB_UDT_code)
            // FIXME: M could be hypersparse.  we should handle this
            && !GB_IS_HYPERSPARSE (M)
            // FIXME: this is easy
            && !A->iso && !B->iso
            // FIXME:
            && !GB_IS_BITMAP (A) && !GB_IS_BITMAP (B)
            && !GB_IS_FULL (A) && !GB_IS_FULL (B))
        {
            printf("Using CUDA Path.\n");
            return true;
        }
        else
        {
            std::cout << "Not using cuda path. M_is_hypersparse: " << GB_IS_HYPERSPARSE(M) <<
                         ", A->iso: " << A->iso << ", B->iso: " << B->iso << ", A_BITMAP: " << GB_IS_BITMAP(A) <<
                         ", B_BITMAP: " << GB_IS_BITMAP(B) << ", GB_IS_FULL(A): " << GB_IS_FULL(A)
                         << ", GB_IS_FULL(B): " << GB_IS_FULL(B) << ", semiring header size: " << semiring->header_size <<

                         std::endl;

            return false;
        }

}