File: GB_reduce_to_scalar_template.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (113 lines) | stat: -rw-r--r-- 4,041 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//------------------------------------------------------------------------------
// GB_reduce_to_scalar_template: s=reduce(A), reduce a matrix to a scalar
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// Reduce a matrix to a scalar, with typecasting and generic operators.
// No panel is used.

{

    //--------------------------------------------------------------------------
    // get A
    //--------------------------------------------------------------------------

    const int8_t   *restrict Ab = A->b ;
    const int64_t  *restrict Ai = A->i ;
    const GB_ATYPE *restrict Ax = (GB_ATYPE *) A->x ;
    int64_t anz = GB_nnz_held (A) ;
    ASSERT (anz > 0) ;
    const bool A_has_zombies = (A->nzombies > 0) ;
    ASSERT (!A->iso) ;

    //--------------------------------------------------------------------------
    // reduce A to a scalar
    //--------------------------------------------------------------------------

    if (nthreads == 1)
    {

        //----------------------------------------------------------------------
        // single thread
        //----------------------------------------------------------------------

        for (int64_t p = 0 ; p < anz ; p++)
        { 
            // skip if the entry is a zombie or if not in the bitmap
            if (A_has_zombies && GB_IS_ZOMBIE (Ai [p])) continue ;
            if (!GBB (Ab, p)) continue ;
            // s = op (s, (ztype) Ax [p])
            GB_ADD_CAST_ARRAY_TO_SCALAR (s, Ax, p) ;
            // check for early exit
            #if GB_HAS_TERMINAL
            if (GB_IS_TERMINAL (s)) break ;
            #endif
        }

    }
    else
    {

        //----------------------------------------------------------------------
        // each thread reduces its own slice in parallel
        //----------------------------------------------------------------------

        bool early_exit = false ;
        int tid ;

        #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
        for (tid = 0 ; tid < ntasks ; tid++)
        {
            int64_t pstart, pend ;
            GB_PARTITION (pstart, pend, anz, tid, ntasks) ;
            // ztype t = identity
            GB_SCALAR_IDENTITY (t) ;
            bool my_exit, found = false ;
            GB_ATOMIC_READ
            my_exit = early_exit ;
            if (!my_exit)
            {
                for (int64_t p = pstart ; p < pend ; p++)
                { 
                    // skip if the entry is a zombie or if not in the bitmap
                    if (A_has_zombies && GB_IS_ZOMBIE (Ai [p])) continue ;
                    if (!GBB (Ab, p)) continue ;
                    found = true ;
                    // t = op (t, (ztype) Ax [p]), with typecast
                    GB_ADD_CAST_ARRAY_TO_SCALAR (t, Ax, p) ;
                    // check for early exit
                    #if GB_HAS_TERMINAL
                    if (GB_IS_TERMINAL (t))
                    { 
                        // tell the other tasks to exit early
                        GB_ATOMIC_WRITE
                        early_exit = true ;
                        break ;
                    }
                    #endif
                }
            }
            F [tid] = found ;
            // W [tid] = t, no typecast
            GB_COPY_SCALAR_TO_ARRAY (W, tid, t) ;
        }

        //----------------------------------------------------------------------
        // sum up the results of each slice using a single thread
        //----------------------------------------------------------------------

        for (int tid = 0 ; tid < ntasks ; tid++)
        {
            if (F [tid])
            { 
                // s = op (s, W [tid]), no typecast
                GB_ADD_ARRAY_TO_SCALAR (s, W, tid) ;
            }
        }
    }
}