File: reduce_demo.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (104 lines) | stat: -rw-r--r-- 2,859 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
//------------------------------------------------------------------------------
// GraphBLAS/Demo/Program/reduce_demo: reduce a matrix to a scalar
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

#include "GraphBLAS.h"
#if defined ( _OPENMP )
#include <omp.h>
#endif

// #define N 65536
   #define N 16384

int main (void)
{

    #if defined ( _OPENMP )
    double t0 = omp_get_wtime ( ) ;
    #endif

    // start GraphBLAS
    GrB_init (GrB_NONBLOCKING) ;
    int nthreads ;
    GxB_Global_Option_get (GxB_GLOBAL_NTHREADS, &nthreads) ;
    printf ("demo: reduce a matrix to a scalar, nthreads: %d\n", nthreads) ;

    int nthreads_max ;
    GxB_Global_Option_get (GxB_GLOBAL_NTHREADS, &nthreads_max) ;
    printf ("# of threads: %d\n", nthreads_max) ;

    #if defined ( _OPENMP )
    t0 = omp_get_wtime ( ) - t0 ;
    printf ("GPU warmup time: %g\n", t0) ;
    t0 = omp_get_wtime ( ) ;
    #endif

    GrB_Index nrows = N ;
    GrB_Index ncols = N ;
    GrB_Matrix A ;
    GrB_Matrix_new (&A, GrB_INT64, nrows, ncols) ;

    GrB_Index *I = (GrB_Index *) malloc (nrows * ncols * sizeof (GrB_Index)) ;
    GrB_Index *J = (GrB_Index *) malloc (nrows * ncols * sizeof (GrB_Index)) ;
    int64_t   *X = (int64_t   *) malloc (nrows * ncols * sizeof (int64_t)) ;

    int64_t k ;
    #pragma omp parallel for num_threads(nthreads_max) schedule(static)
    for (k = 0 ; k < N*N ; k++)
    {
        // k = i * N + j ;
        int64_t i = k / N ;
        int64_t j = k % N ;
        // int x = (int) (rand ( ) & 0xFF) ;
        int x = (int) (k & 0xFF) ;
        I [k] = i ;
        J [k] = j ;
        X [k] = x ;
    }

    GrB_Index nvals = N*N ;
    GrB_Matrix_build_INT64 (A, I, J, X, nvals, GrB_PLUS_INT64) ;

    free (I) ;
    free (J) ;
    free (X) ;

    #if defined ( _OPENMP )
    t0 = omp_get_wtime ( ) - t0 ;
    printf ("time to create matrix: %g\n", t0) ;
    #endif

    GrB_Index result ;

    double t1 ;

    printf ("\nreduce to a scalar:\n") ;

    for (int nthreads = 1 ; nthreads <= nthreads_max ; nthreads++)
    {
        GxB_Global_Option_set (GxB_GLOBAL_NTHREADS, nthreads) ;
        #if defined ( _OPENMP )
        double t = omp_get_wtime ( ) ;
        #endif
        GrB_Matrix_reduce_UINT64 (&result, NULL, GrB_PLUS_MONOID_INT64,
            A, NULL) ;
        #if defined ( _OPENMP )
        t = omp_get_wtime ( ) - t ;
        if (nthreads == 1) t1 = t ;
        printf ("nthreads %3d time: %12.6f speedup %8.2f\n", 
            nthreads, t, t1/t) ;
        #endif
    }

    printf ("result %" PRId64 "\n", result) ;

    // free everyting
    GrB_Matrix_free (&A) ;
    GrB_finalize ( ) ;
}