File: GB_cuda_reduce_to_scalar_jit.cpp

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (62 lines) | stat: -rw-r--r-- 2,336 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
//------------------------------------------------------------------------------
// GB_cuda_reduce_to_scalar_jit: reduce a matrix to a scalar, via the CUDA JIT
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

#include "GB_cuda_reduce.hpp"

extern "C"
{
    typedef GB_JIT_CUDA_KERNEL_REDUCE_PROTO ((*GB_jit_dl_function)) ;
}

GrB_Info GB_cuda_reduce_to_scalar_jit   // z = reduce_to_scalar (A) via CUDA JIT
(
    // output:
    GB_void *z,                 // result if has_cheeseburger is true
    GrB_Matrix V,               // result if has_cheeseburger is false
    // input:
    const GrB_Monoid monoid,    // monoid to do the reduction
    const GrB_Matrix A,         // matrix to reduce
    // CUDA stream and launch parameters:
    cudaStream_t stream,
    int32_t gridsz,
    int32_t blocksz
)
{ 

    //--------------------------------------------------------------------------
    // encodify the problem
    //--------------------------------------------------------------------------

    GB_jit_encoding encoding ;
    char *suffix ;
    uint64_t hash = GB_encodify_reduce (&encoding, &suffix,
        GB_JIT_CUDA_KERNEL_REDUCE, monoid, A) ;

    // FIXME: could get has_cheesburger here, and allocate zscalar
    // and V accordingly.

    //--------------------------------------------------------------------------
    // get the kernel function pointer, loading or compiling it if needed
    //--------------------------------------------------------------------------

    void *dl_function ;
    GrB_Info info = GB_jitifyer_load (&dl_function,
        GB_jit_reduce_family, "cuda_reduce",
        hash, &encoding, suffix, NULL, monoid,
        NULL, A->type, NULL, NULL) ;
    if (info != GrB_SUCCESS) return (info) ;

    //--------------------------------------------------------------------------
    // call the jit kernel and return result
    //--------------------------------------------------------------------------

    GB_jit_dl_function GB_jit_kernel = (GB_jit_dl_function) dl_function ;
    return (GB_jit_kernel (z, V, A, stream, gridsz, blocksz, &GB_callback)) ;
}