File: GB_cuda_colscale.cpp

package info (click to toggle)
suitesparse 1%3A7.11.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 258,172 kB
  • sloc: ansic: 1,153,566; cpp: 48,145; makefile: 4,997; fortran: 2,087; java: 1,826; sh: 1,113; ruby: 725; python: 676; asm: 371; sed: 166; awk: 44
file content (36 lines) | stat: -rw-r--r-- 892 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include "GB_cuda_ewise.hpp"

#undef  GB_FREE_ALL
#define GB_FREE_ALL                                         \
{                                                           \
    GB_cuda_release_stream (&stream) ;                      \
}

#define BLOCK_SIZE 128
#define LOG2_BLOCK_SIZE 7

GrB_Info GB_cuda_colscale
(
    GrB_Matrix C,
    const GrB_Matrix A,
    const GrB_Matrix D,
    const GrB_Semiring semiring,
    const bool flipxy
)
{
    GrB_Info info ;
    cudaStream_t stream = nullptr ;
    GB_OK (GB_cuda_acquire_stream (&stream)) ;

    // compute gridsz, blocksz, call GB_cuda_rowscale_jit
    GrB_Index anz = GB_nnz_held (A) ;
    
    int32_t gridsz = 1 + (anz >> LOG2_BLOCK_SIZE) ;

    GB_OK (GB_cuda_colscale_jit ( C, A, D, 
        semiring->multiply, flipxy, stream, gridsz, BLOCK_SIZE)) ;
    
    GB_OK (GB_cuda_release_stream (&stream)) ;
    return GrB_SUCCESS ; 
}