File: GB_reduce_build_template.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 503; asm: 369; python: 125; awk: 10
file content (133 lines) | stat: -rw-r--r-- 5,018 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
//------------------------------------------------------------------------------
// GB_reduce_build_template.c: Tx=build(Sx), and assemble any duplicate tuples
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// This template is used in GB_builder and the Generated2/GB_red__* workers.
// This is the same for both vectors and matrices, since this step is agnostic
// about which vectors the entries appear.

// Sx and Tx are either both iso or both non-iso.  For the iso case,
// GB_ISO_BUILD is defined, and K_work is NULL.  The iso case is not handled by
// the Generated2/ GB_red__* workers, since it doesn't access the values at all.

{

    // k unused for some uses of this template
    #include "GB_unused.h"

    if (ndupl == 0)
    {

        //----------------------------------------------------------------------
        // no duplicates, just permute Sx into Tx
        //----------------------------------------------------------------------

        // If no duplicates are present, then GB_builder has already
        // transplanted I_work into T->i, so this step does not need to
        // construct T->i.  The tuple values, in Sx, are copied or permuted
        // into T->x.  This step is skipped if T and Sx are iso.

        #ifndef GB_ISO_BUILD

            if (K_work == NULL)
            {

                int tid ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (tid = 0 ; tid < nthreads ; tid++)
                {
                    int64_t tstart = tstart_slice [tid] ;
                    int64_t tend   = tstart_slice [tid+1] ;
                    for (int64_t t = tstart ; t < tend ; t++)
                    { 
                        // Tx [t] = (ttype) Sx [t] ; with typecast
                        GB_CAST_ARRAY_TO_ARRAY (Tx, t, Sx, t) ;
                    }
                }

            }
            else
            {

                int tid ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (tid = 0 ; tid < nthreads ; tid++)
                {
                    int64_t tstart = tstart_slice [tid] ;
                    int64_t tend   = tstart_slice [tid+1] ;
                    for (int64_t t = tstart ; t < tend ; t++)
                    { 
                        // Tx [t] = (ttype) Sx [K_work [t]] ; with typecast
                        GB_CAST_ARRAY_TO_ARRAY (Tx, t, Sx, K_work [t]) ;
                    }
                }
            }

        #endif

    }
    else
    {

        //----------------------------------------------------------------------
        // assemble duplicates
        //----------------------------------------------------------------------

        // If T and Sx as non-iso, entries in Sx must be copied into T->x, with
        // any duplicates summed via the operator.  T->i must also be
        // constructed.  T->x and Sx are not modified if they are iso.

        int tid ;
        #pragma omp parallel for num_threads(nthreads) schedule(static)
        for (tid = 0 ; tid < nthreads ; tid++)
        {
            int64_t my_tnz = tnz_slice [tid] ;
            int64_t tstart = tstart_slice [tid] ;
            int64_t tend   = tstart_slice [tid+1] ;

            // find the first unique tuple owned by this slice
            int64_t t ;
            for (t = tstart ; t < tend ; t++)
            { 
                // get the tuple and break if it is not a duplicate
                if (I_work [t] >= 0) break ;
            }

            // scan all tuples and assemble any duplicates
            for ( ; t < tend ; t++)
            {
                // get the t-th tuple, a unique tuple
                int64_t i = I_work [t] ;
                ASSERT (i >= 0) ;
                #ifndef GB_ISO_BUILD
                int64_t k = (K_work == NULL) ? t : K_work [t] ;
                // Tx [my_tnz] = Sx [k] ; with typecast
                GB_CAST_ARRAY_TO_ARRAY (Tx, my_tnz, Sx, k) ;
                #endif
                Ti [my_tnz] = i ;

                // assemble all duplicates that follow it.  This may assemble
                // the first duplicates in the next slice(s) (up to but not
                // including the first unique tuple in the subsequent slice(s)).
                for ( ; t+1 < nvals && I_work [t+1] < 0 ; t++)
                { 
                    // assemble the duplicate tuple
                    #ifndef GB_ISO_BUILD
                    int64_t k = (K_work == NULL) ? (t+1) : K_work [t+1] ;
                    // Tx [my_tnz] += Sx [k] with typecast
                    GB_ADD_CAST_ARRAY_TO_ARRAY (Tx, my_tnz, Sx, k) ;
                    #endif
                }
                my_tnz++ ;
            }
        }
    }
}

#undef GB_ISO_BUILD