File: GB_cuda_cumsum.cu

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 503; asm: 369; python: 125; awk: 10
file content (66 lines) | stat: -rw-r--r-- 2,285 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
//------------------------------------------------------------------------------
// GB_cuda_cumsum: cumlative sum of an array using GPU acceleration
//------------------------------------------------------------------------------

// SPDX-License-Identifier: Apache-2.0
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.

//------------------------------------------------------------------------------

// Compute the cumulative sum of an array count[0:n], of size n+1
// in pseudo-MATLAB notation:

//      k = sum (count [0:n-1] != 0) ;

//      count = cumsum ([0 count[0:n-1]]) ;

// That is, count [j] on input is overwritten with the value of
// sum (count [0..j-1]).  count [n] is implicitly zero on input.
// On output, count [n] is the total sum.

#include "GB_cuda.h"
#include <local_cub/device/device_scan.cuh>

GrB_Info GB_cuda_cumsum             // compute the cumulative sum of an array
(
    int64_t *restrict count,    // size n+1, input/output
    const int64_t n
)
{
    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (count != NULL) ;
    ASSERT (n >= 0) ;

    //--------------------------------------------------------------------------
    // count = cumsum ([0 count[0:n-1]]) ;
    //--------------------------------------------------------------------------
    void *d_temp_storage = NULL;
    size_t temp_storage_bytes;
    cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, count, count, (int)n);
    size_t size ;
    d_temp_storage  = GB_malloc_memory( temp_storage_bytes, 1, &size);
    if ( d_temp_storage == NULL){
       return GrB_OUT_OF_MEMORY;
    } 

    // Run
    CubDebugExit(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, count, count, n));

    // Check for correctness (and display results, if specified)
    #if 0
    #ifdef GB_DEBUG
    int compare = CompareDeviceResults(h_reference, count, num_items, true, g_verbose);
    ASSERT( compare == 0);
    #endif
    #endif

    // Cleanup
    GB_dealloc_memory (&d_temp_storage, size) ;

    return GrB_SUCCESS;
}