1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
|
#include "GB_cuda_ewise.hpp"
#undef GB_FREE_ALL
#define GB_FREE_ALL \
{ \
GB_cuda_release_stream (&stream) ; \
}
#define BLOCK_SIZE 128
#define LOG2_BLOCK_SIZE 7
GrB_Info GB_cuda_colscale
(
GrB_Matrix C,
const GrB_Matrix A,
const GrB_Matrix D,
const GrB_Semiring semiring,
const bool flipxy
)
{
GrB_Info info ;
cudaStream_t stream = nullptr ;
GB_OK (GB_cuda_acquire_stream (&stream)) ;
// compute gridsz, blocksz, call GB_cuda_rowscale_jit
GrB_Index anz = GB_nnz_held (A) ;
int32_t gridsz = 1 + (anz >> LOG2_BLOCK_SIZE) ;
GB_OK (GB_cuda_colscale_jit ( C, A, D,
semiring->multiply, flipxy, stream, gridsz, BLOCK_SIZE)) ;
GB_OK (GB_cuda_release_stream (&stream)) ;
return GrB_SUCCESS ;
}
|