File: GB_jit_kernel_cuda_apply_bind2nd.cu

package info (click to toggle)

suitesparse 1%3A7.10.1%2Bdfsg-1

links: PTS, VCS
area: main
in suites: forky, trixie
size: 254,920 kB
sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44

file content (47 lines) | stat: -rw-r--r-- 1,139 bytes

using namespace cooperative_groups ;

__global__ void GB_cuda_apply_bind2nd_kernel
(
    GB_void *Cx_out,
    GrB_Matrix A,
    const GB_void *scalarx
)
{
    const GB_Y_TYPE x = * ((GB_Y_TYPE *) scalarx) ; // gets scalarx [0]
    const GB_A_TYPE *__restrict__ Ax = (GB_A_TYPE *) A->x ;
    GB_C_TYPE *__restrict__ Cx = (GB_C_TYPE *) Cx_out ;

    #if ( GB_A_IS_BITMAP )
    const int8_t *__restrict__ Ab = A->b ;
    #endif
    
    GB_A_NHELD (nvals) ;

    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    int nthreads = blockDim.x * gridDim.x ;

    for (int64_t p = tid ; p < nvals ; p += nthreads)
    {
        if (!GBb_A (Ab, p)) { continue ; }
        GB_DECLAREA (aij) ;
        GB_GETA (aij, Ax, p, false) ;
        GB_EWISEOP (Cx, p, aij, x, /* i */, /* j */) ;
    }
}

extern "C" {
    GB_JIT_CUDA_KERNEL_APPLY_BIND2ND_PROTO (GB_jit_kernel) ;
}

GB_JIT_CUDA_KERNEL_APPLY_BIND2ND_PROTO (GB_jit_kernel)
{
    GB_GET_CALLBACKS ;
    ASSERT (Cx != NULL) ;

    dim3 grid (gridsz) ;
    dim3 block (blocksz) ;
    
    GB_cuda_apply_bind2nd_kernel <<<grid, block, 0, stream>>> (Cx, A, scalarx) ;

    return (GrB_SUCCESS) ;
}