File: GB_ek_slice.h

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (228 lines) | stat: -rw-r--r-- 9,028 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
//------------------------------------------------------------------------------
// GB_ek_slice.h: slice the entries and vectors of a matrix
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

#ifndef GB_EK_SLICE_H
#define GB_EK_SLICE_H
#include "GB.h"

//------------------------------------------------------------------------------
// GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice
//------------------------------------------------------------------------------

static inline void GB_ek_slice_ntasks
(
    // output
    int *nthreads,              // # of threads to use for GB_ek_slice
    int *ntasks,                // # of tasks to create for GB_ek_slice
    // input
    GrB_Matrix A,               // matrix to slice
    int ntasks_per_thread,      // # of tasks per thread
    double work,                // total work to do
    double chunk,               // give each thread at least this much work
    int nthreads_max            // max # of threads to use
)
{
    int64_t anz = GB_nnz_held (A) ;
    if (anz == 0)
    {
        (*nthreads) = 1 ;
        (*ntasks) = 1 ;
    }
    else
    {
        (*nthreads) = GB_nthreads (work, chunk, nthreads_max) ;
        (*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ;
        (*ntasks) = GB_IMIN (*ntasks, anz) ;
        (*ntasks) = GB_IMAX (*ntasks, 1) ;
    }
}

//------------------------------------------------------------------------------
// GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice
//------------------------------------------------------------------------------

#define GB_SLICE_MATRIX_WORK(X,NTASKS_PER_THREAD,chunk,work)                  \
    GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), X,                \
        NTASKS_PER_THREAD, work, chunk, nthreads_max) ;                       \
    GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ;            \
    if (X ## _ek_slicing == NULL)                                             \
    {                                                                         \
        /* out of memory */                                                   \
        GB_FREE_ALL ;                                                         \
        return (GrB_OUT_OF_MEMORY) ;                                          \
    }                                                                         \
    GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;                         \
    const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ;                 \
    const int64_t *klast_  ## X ## slice = X ## _ek_slicing + X ## _ntasks ;  \
    const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;

#define GB_SLICE_MATRIX(X,NTASKS_PER_THREAD,chunk)                            \
    GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), X,                \
        NTASKS_PER_THREAD, GB_nnz_held (X) + X->nvec, chunk, nthreads_max) ;  \
    GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ;            \
    if (X ## _ek_slicing == NULL)                                             \
    {                                                                         \
        /* out of memory */                                                   \
        GB_FREE_ALL ;                                                         \
        return (GrB_OUT_OF_MEMORY) ;                                          \
    }                                                                         \
    GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;                         \
    const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ;                 \
    const int64_t *klast_  ## X ## slice = X ## _ek_slicing + X ## _ntasks ;  \
    const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;

//------------------------------------------------------------------------------
// GB_ek_slice prototypes
//------------------------------------------------------------------------------

// Slice the entries of a matrix or vector into ntasks slices.

// Task t does entries pstart_slice [t] to pstart_slice [t+1]-1 and
// vectors kfirst_slice [t] to klast_slice [t].  The first and last vectors
// may be shared with prior slices and subsequent slices.

// On input, ntasks must be <= nnz (A), unless nnz (A) is zero.  In that
// case, ntasks must be 1.

void GB_ek_slice            // slice a matrix
(
    // output:
    int64_t *restrict A_ek_slicing,  // size 3*ntasks+1
    // input:
    GrB_Matrix A,                       // matrix to slice
    int ntasks                          // # of tasks
) ;

void GB_ek_slice_merge1     // merge column counts for the matrix C
(
    // input/output:
    int64_t *restrict Cp,                    // column counts
    // input:
    const int64_t *restrict Wfirst,          // size ntasks
    const int64_t *restrict Wlast,           // size ntasks
    const int64_t *ek_slicing,                  // size 3*ntasks+1
    const int ntasks                            // # of tasks
) ;

void GB_ek_slice_merge2     // merge final results for matrix C
(
    // output
    int64_t *C_nvec_nonempty,           // # of non-empty vectors in C
    int64_t *restrict Cp_kfirst,     // size ntasks
    // input/output
    int64_t *restrict Cp,            // size cnvec+1
    // input
    const int64_t cnvec,
    const int64_t *restrict Wfirst,          // size ntasks
    const int64_t *restrict Wlast,           // size ntasks
    const int64_t *ek_slicing,                  // size 3*ntasks+1
    const int ntasks,                   // # of tasks used to construct C
    const int nthreads,                 // # of threads to use
    GB_Context Context
) ;

//------------------------------------------------------------------------------
// GB_get_pA_and_pC: find the part of A(:,k) and C(:,k) for this task
//------------------------------------------------------------------------------

// The tasks were generated by GB_ek_slice.

static inline void GB_get_pA_and_pC
(
    // output
    int64_t *pA_start,
    int64_t *pA_end,
    int64_t *pC,
    // input
    int tid,            // task id
    int64_t k,          // current vector
    int64_t kfirst,     // first vector for this slice
    int64_t klast,      // last vector for this slice
    const int64_t *restrict pstart_slice,   // start of each slice in A
    const int64_t *restrict Cp_kfirst,      // start of each slice in C
    const int64_t *restrict Cp,             // vector pointers for C
    int64_t cvlen,                             // C->vlen
    const int64_t *restrict Ap,             // vector pointers for A
    int64_t avlen                              // A->vlen
)
{

    int64_t p0 = GBP (Ap, k, avlen) ;
    int64_t p1 = GBP (Ap, k+1, avlen) ;

    if (k == kfirst)
    { 
        // First vector for task tid; may only be partially owned.
        (*pA_start) = pstart_slice [tid] ;
        (*pA_end  ) = GB_IMIN (p1, pstart_slice [tid+1]) ;
        (*pC) = Cp_kfirst [tid] ;
    }
    else if (k == klast)
    { 
        // Last vector for task tid; may only be partially owned.
        (*pA_start) = p0 ;
        (*pA_end  ) = pstart_slice [tid+1] ;
        (*pC) = GBP (Cp, k, cvlen) ;
    }
    else
    { 
        // task tid entirely owns this vector A(:,k).
        (*pA_start) = p0 ;
        (*pA_end  ) = p1 ;
        (*pC) = GBP (Cp, k, cvlen) ;
    }
}

//------------------------------------------------------------------------------
// GB_get_pA: find the part of A(:,k) to be operated on by this task
//------------------------------------------------------------------------------

// The tasks were generated by GB_ek_slice.

static inline void GB_get_pA
(
    // output
    int64_t *pA_start,
    int64_t *pA_end,
    // input
    int tid,            // task id
    int64_t k,          // current vector
    int64_t kfirst,     // first vector for this slice
    int64_t klast,      // last vector for this slice
    const int64_t *restrict pstart_slice,   // start of each slice in A
    const int64_t *restrict Ap,             // vector pointers for A
    int64_t avlen                              // A->vlen
)
{

    int64_t p0 = GBP (Ap, k, avlen) ;
    int64_t p1 = GBP (Ap, k+1, avlen) ;

    if (k == kfirst)
    { 
        // First vector for task tid; may only be partially owned.
        (*pA_start) = pstart_slice [tid] ;
        (*pA_end  ) = GB_IMIN (p1, pstart_slice [tid+1]) ;
    }
    else if (k == klast)
    { 
        // Last vector for task tid; may only be partially owned.
        (*pA_start) = p0 ;
        (*pA_end  ) = pstart_slice [tid+1] ;
    }
    else
    { 
        // task tid entirely owns this vector A(:,k).
        (*pA_start) = p0 ;
        (*pA_end  ) = p1 ;
    }
}

#endif