File: GB_ewise_slice.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (553 lines) | stat: -rw-r--r-- 20,936 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
//------------------------------------------------------------------------------
// GB_ewise_slice: slice the entries and vectors for an ewise operation
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// Constructs a set of tasks to compute C, for an element-wise operation that
// operates on two input matrices, C=op(A,B).  These include:
// GB_add, GB_emult, and GB_masker, and many GB_subassign_* methods
// (02, 04, 06s_and_14, 08n, 08s_and_16, 09, 10_and_18, 11, 12_and_20).

// The mask is ignored for computing where to slice the work, but it is sliced
// once the location has been found.

// M, A, B: any sparsity structure (hypersparse, sparse, bitmap, or full).
// C: constructed as sparse or hypersparse in the caller.

#define GB_FREE_WORKSPACE                       \
{                                               \
    GB_WERK_POP (Coarse, int64_t) ;             \
    GB_FREE_WORK (&Cwork, Cwork_size) ;         \
}

#define GB_FREE_ALL                             \
{                                               \
    GB_FREE_WORKSPACE ;                         \
    GB_FREE_WORK (&TaskList, TaskList_size) ;   \
}

#include "GB.h"

//------------------------------------------------------------------------------
// GB_ewise_slice
//------------------------------------------------------------------------------

GrB_Info GB_ewise_slice
(
    // output:
    GB_task_struct **p_TaskList,    // array of structs
    size_t *p_TaskList_size,        // size of TaskList
    int *p_ntasks,                  // # of tasks constructed
    int *p_nthreads,                // # of threads for eWise operation
    // input:
    const int64_t Cnvec,            // # of vectors of C
    const int64_t *restrict Ch,     // vectors of C, if hypersparse
    const int64_t *restrict C_to_M, // mapping of C to M
    const int64_t *restrict C_to_A, // mapping of C to A
    const int64_t *restrict C_to_B, // mapping of C to B
    bool Ch_is_Mh,                  // if true, then Ch == Mh; GB_add only
    const GrB_Matrix M,             // mask matrix to slice (optional)
    const GrB_Matrix A,             // matrix to slice
    const GrB_Matrix B,             // matrix to slice
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (p_TaskList != NULL) ;
    ASSERT (p_TaskList_size != NULL) ;
    ASSERT (p_ntasks != NULL) ;
    ASSERT (p_nthreads != NULL) ;

    ASSERT_MATRIX_OK (A, "A for ewise_slice", GB0) ;
    ASSERT (!GB_ZOMBIES (A)) ; 
    ASSERT (!GB_JUMBLED (A)) ;
    ASSERT (!GB_PENDING (A)) ; 

    ASSERT_MATRIX_OK (B, "B for ewise_slice", GB0) ;
    ASSERT (!GB_ZOMBIES (B)) ; 
    ASSERT (!GB_JUMBLED (B)) ;
    ASSERT (!GB_PENDING (B)) ; 

    ASSERT_MATRIX_OK_OR_NULL (M, "M for ewise_slice", GB0) ;
    ASSERT (!GB_ZOMBIES (M)) ; 
    ASSERT (!GB_JUMBLED (M)) ;
    ASSERT (!GB_PENDING (M)) ; 

    (*p_TaskList  ) = NULL ;
    (*p_TaskList_size) = 0 ;
    (*p_ntasks    ) = 0 ;
    (*p_nthreads  ) = 1 ;

    int64_t *restrict Cwork = NULL ; size_t Cwork_size = 0 ;
    GB_WERK_DECLARE (Coarse, int64_t) ;     // size ntasks1+1
    int ntasks1 = 0 ;

    //--------------------------------------------------------------------------
    // determine # of threads to use
    //--------------------------------------------------------------------------

    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;

    //--------------------------------------------------------------------------
    // allocate the initial TaskList
    //--------------------------------------------------------------------------

    // Allocate the TaskList to hold at least 2*ntask0 tasks.  It will grow
    // later, if needed.  Usually, 64*nthreads_max is enough, but in a few cases
    // fine tasks can cause this number to be exceeded.  If that occurs,
    // TaskList is reallocated.

    // When the mask is present, it is often fastest to break the work up
    // into tasks, even when nthreads_max is 1.

    GB_task_struct *restrict TaskList = NULL ; size_t TaskList_size = 0 ;
    int max_ntasks = 0 ;
    int ntasks0 = (M == NULL && nthreads_max == 1) ? 1 : (32 * nthreads_max) ;
    GB_REALLOC_TASK_WORK (TaskList, ntasks0, max_ntasks) ;

    //--------------------------------------------------------------------------
    // check for quick return for a single task
    //--------------------------------------------------------------------------

    if (Cnvec == 0 || ntasks0 == 1)
    { 
        // construct a single coarse task that computes all of C
        TaskList [0].kfirst = 0 ;
        TaskList [0].klast  = Cnvec-1 ;
        (*p_TaskList  ) = TaskList ;
        (*p_TaskList_size) = TaskList_size ;
        (*p_ntasks    ) = (Cnvec == 0) ? 0 : 1 ;
        (*p_nthreads  ) = 1 ;
        return (GrB_SUCCESS) ;
    }

    //--------------------------------------------------------------------------
    // get A, B, and M
    //--------------------------------------------------------------------------

    const int64_t vlen = A->vlen ;
    const int64_t *restrict Ap = A->p ;
    const int64_t *restrict Ai = A->i ;
    const int64_t *restrict Bp = B->p ;
    const int64_t *restrict Bi = B->i ;
    bool Ch_is_Ah = (Ch != NULL && A->h != NULL && Ch == A->h) ;
    bool Ch_is_Bh = (Ch != NULL && B->h != NULL && Ch == B->h) ;

    const int64_t *restrict Mp = NULL ;
    const int64_t *restrict Mi = NULL ;
    bool M_is_hyper = GB_IS_HYPERSPARSE (M) ;
    if (M != NULL)
    { 
        Mp = M->p ;
        Mi = M->i ;
        // Ch_is_Mh is true if either true on input (for GB_add, which denotes
        // that Ch is a deep copy of M->h), or if Ch is a shallow copy of M->h.
        Ch_is_Mh = Ch_is_Mh || (Ch != NULL && M_is_hyper && Ch == M->h) ;
    }

    //--------------------------------------------------------------------------
    // allocate workspace
    //--------------------------------------------------------------------------

    Cwork = GB_MALLOC_WORK (Cnvec+1, int64_t, &Cwork_size) ;
    if (Cwork == NULL)
    { 
        // out of memory
        GB_FREE_ALL ;
        return (GrB_OUT_OF_MEMORY) ;
    }

    //--------------------------------------------------------------------------
    // compute an estimate of the work for each vector of C
    //--------------------------------------------------------------------------

    int nthreads_for_Cwork = GB_nthreads (Cnvec, chunk, nthreads_max) ;

    int64_t k ;
    #pragma omp parallel for num_threads(nthreads_for_Cwork) schedule(static)
    for (k = 0 ; k < Cnvec ; k++)
    {

        //----------------------------------------------------------------------
        // get the C(:,j) vector
        //----------------------------------------------------------------------

        int64_t j = GBH (Ch, k) ;

        //----------------------------------------------------------------------
        // get the corresponding vector of A
        //----------------------------------------------------------------------

        int64_t kA ;
        if (C_to_A != NULL)
        { 
            // A is hypersparse and the C_to_A mapping has been created
            ASSERT (GB_IS_HYPERSPARSE (A)) ;
            kA = C_to_A [k] ;
            ASSERT (kA >= -1 && kA < A->nvec) ;
            if (kA >= 0)
            {
                ASSERT (j == GBH (A->h, kA)) ;
            }
        }
        else if (Ch_is_Ah)
        { 
            // A is hypersparse, but Ch is a shallow copy of A->h
            ASSERT (GB_IS_HYPERSPARSE (A)) ;
            kA = k ;
            ASSERT (j == A->h [kA]) ;
        }
        else
        { 
            // A is sparse, bitmap, or full
            ASSERT (!GB_IS_HYPERSPARSE (A)) ;
            kA = j ;
        }

        //----------------------------------------------------------------------
        // get the corresponding vector of B
        //----------------------------------------------------------------------

        int64_t kB ;
        if (C_to_B != NULL)
        { 
            // B is hypersparse and the C_to_B mapping has been created
            ASSERT (GB_IS_HYPERSPARSE (B)) ;
            kB = C_to_B [k] ;
            ASSERT (kB >= -1 && kB < B->nvec) ;
            if (kB >= 0)
            {
                ASSERT (j == GBH (B->h, kB)) ;
            }
        }
        else if (Ch_is_Bh)
        { 
            // B is hypersparse, but Ch is a shallow copy of B->h
            ASSERT (GB_IS_HYPERSPARSE (B)) ;
            kB = k ;
            ASSERT (j == B->h [kB]) ;
        }
        else
        { 
            // B is sparse, bitmap, or full
            ASSERT (!GB_IS_HYPERSPARSE (B)) ;
            kB = j ;
        }

        //----------------------------------------------------------------------
        // estimate the work for C(:,j)
        //----------------------------------------------------------------------

        ASSERT (kA >= -1 && kA < A->nvec) ;
        ASSERT (kB >= -1 && kB < B->nvec) ;
        const int64_t aknz = (kA < 0) ? 0 :
            ((Ap == NULL) ? vlen : (Ap [kA+1] - Ap [kA])) ;
        const int64_t bknz = (kB < 0) ? 0 :
            ((Bp == NULL) ? vlen : (Bp [kB+1] - Bp [kB])) ;

        Cwork [k] = aknz + bknz + 1 ;
    }

    //--------------------------------------------------------------------------
    // replace Cwork with its cumulative sum
    //--------------------------------------------------------------------------

    GB_cumsum (Cwork, Cnvec, NULL, nthreads_for_Cwork, Context) ;
    double cwork = (double) Cwork [Cnvec] ;

    //--------------------------------------------------------------------------
    // determine # of threads and tasks for the eWise operation
    //--------------------------------------------------------------------------

    int nthreads = GB_nthreads (cwork, chunk, nthreads_max) ;

    ntasks0 = (M == NULL && nthreads == 1) ? 1 : (32 * nthreads) ;
    double target_task_size = cwork / (double) (ntasks0) ;
    target_task_size = GB_IMAX (target_task_size, chunk) ;
    ntasks1 = cwork / target_task_size ;
    ntasks1 = GB_IMAX (ntasks1, 1) ;

    //--------------------------------------------------------------------------
    // slice the work into coarse tasks
    //--------------------------------------------------------------------------

    GB_WERK_PUSH (Coarse, ntasks1 + 1, int64_t) ;
    if (Coarse == NULL)
    { 
        // out of memory
        GB_FREE_ALL ;
        return (GrB_OUT_OF_MEMORY) ;
    }
    GB_pslice (Coarse, Cwork, Cnvec, ntasks1, false) ;

    //--------------------------------------------------------------------------
    // construct all tasks, both coarse and fine
    //--------------------------------------------------------------------------

    int ntasks = 0 ;

    for (int t = 0 ; t < ntasks1 ; t++)
    {

        //----------------------------------------------------------------------
        // coarse task computes C (:,k:klast)
        //----------------------------------------------------------------------

        int64_t k = Coarse [t] ;
        int64_t klast  = Coarse [t+1] - 1 ;

        if (k >= Cnvec)
        { 

            //------------------------------------------------------------------
            // all tasks have been constructed
            //------------------------------------------------------------------

            break ;

        }
        else if (k < klast)
        { 

            //------------------------------------------------------------------
            // coarse task has 2 or more vectors
            //------------------------------------------------------------------

            // This is a non-empty coarse-grain task that does two or more
            // entire vectors of C, vectors k:klast, inclusive.
            GB_REALLOC_TASK_WORK (TaskList, ntasks + 1, max_ntasks) ;
            TaskList [ntasks].kfirst = k ;
            TaskList [ntasks].klast  = klast ;
            ntasks++ ;

        }
        else
        {

            //------------------------------------------------------------------
            // coarse task has 0 or 1 vectors
            //------------------------------------------------------------------

            // As a coarse-grain task, this task is empty or does a single
            // vector, k.  Vector k must be removed from the work done by this
            // and any other coarse-grain task, and split into one or more
            // fine-grain tasks.

            for (int tt = t ; tt < ntasks1 ; tt++)
            {
                // remove k from the initial slice tt
                if (Coarse [tt] == k)
                { 
                    // remove k from task tt
                    Coarse [tt] = k+1 ;
                }
                else
                { 
                    // break, k not in task tt
                    break ;
                }
            }

            //------------------------------------------------------------------
            // get the vector of C
            //------------------------------------------------------------------

            int64_t j = GBH (Ch, k) ;

            //------------------------------------------------------------------
            // get the corresponding vector of A
            //------------------------------------------------------------------

            int64_t kA ;
            if (C_to_A != NULL)
            { 
                // A is hypersparse and the C_to_A mapping has been created
                ASSERT (GB_IS_HYPERSPARSE (A)) ;
                kA = C_to_A [k] ;
            }
            else if (Ch_is_Ah)
            { 
                // A is hypersparse, but Ch is a shallow copy of A->h
                ASSERT (GB_IS_HYPERSPARSE (A)) ;
                kA = k ;
            }
            else
            { 
                // A is sparse, bitmap, or full
                ASSERT (!GB_IS_HYPERSPARSE (A)) ;
                kA = j ;
            }
            int64_t pA_start = (kA < 0) ? (-1) : GBP (Ap, kA, vlen) ;
            int64_t pA_end   = (kA < 0) ? (-1) : GBP (Ap, kA+1, vlen) ;
            bool a_empty = (pA_end == pA_start) ;

            //------------------------------------------------------------------
            // get the corresponding vector of B
            //------------------------------------------------------------------

            int64_t kB ;
            if (C_to_B != NULL)
            { 
                // B is hypersparse and the C_to_B mapping has been created
                ASSERT (GB_IS_HYPERSPARSE (B)) ;
                kB = C_to_B [k] ;
            }
            else if (Ch_is_Bh)
            { 
                // B is hypersparse, but Ch is a shallow copy of B->h
                ASSERT (GB_IS_HYPERSPARSE (B)) ;
                kB = k ;
            }
            else
            { 
                // B is sparse, bitmap, or full
                ASSERT (!GB_IS_HYPERSPARSE (B)) ;
                kB = j ;
            }
            int64_t pB_start = (kB < 0) ? (-1) : GBP (Bp, kB, vlen) ;
            int64_t pB_end   = (kB < 0) ? (-1) : GBP (Bp, kB+1, vlen) ;
            bool b_empty = (pB_end == pB_start) ;

            //------------------------------------------------------------------
            // get the corresponding vector of M, if present
            //------------------------------------------------------------------

            // M can have any sparsity structure (hyper, sparse, bitmap, full)

            int64_t pM_start = -1 ;
            int64_t pM_end   = -1 ;
            if (M != NULL)
            {
                int64_t kM ;
                if (C_to_M != NULL)
                { 
                    // M is hypersparse and the C_to_M mapping has been created
                    ASSERT (GB_IS_HYPERSPARSE (M)) ;
                    kM = C_to_M [k] ;
                }
                else if (Ch_is_Mh)
                { 
                    // M is hypersparse, but Ch is a copy of Mh
                    ASSERT (GB_IS_HYPERSPARSE (M)) ;
                    // Ch is a deep or shallow copy of Mh
                    kM = k ;
                }
                else
                { 
                    // M is sparse, bitmap, or full
                    ASSERT (!GB_IS_HYPERSPARSE (M)) ;
                    kM = j ;
                }
                pM_start = (kM < 0) ? -1 : GBP (Mp, kM, vlen) ;
                pM_end   = (kM < 0) ? -1 : GBP (Mp, kM+1, vlen) ;
            }
            bool m_empty = (pM_end == pM_start) ;

            //------------------------------------------------------------------
            // determine the # of fine-grain tasks to create for vector k
            //------------------------------------------------------------------

            double ckwork = Cwork [k+1] - Cwork [k] ;
            int nfine = ckwork / target_task_size ;
            nfine = GB_IMAX (nfine, 1) ;

            // make the TaskList bigger, if needed
            GB_REALLOC_TASK_WORK (TaskList, ntasks + nfine, max_ntasks) ;

            //------------------------------------------------------------------
            // create the fine-grain tasks
            //------------------------------------------------------------------

            if (nfine == 1)
            { 

                //--------------------------------------------------------------
                // this is a single coarse task for all of vector k
                //--------------------------------------------------------------

                TaskList [ntasks].kfirst = k ;
                TaskList [ntasks].klast  = k ;
                ntasks++ ;

            }
            else
            {

                //--------------------------------------------------------------
                // slice vector k into nfine fine tasks
                //--------------------------------------------------------------

                // first fine task starts at the top of vector k
                ASSERT (ntasks < max_ntasks) ;
                TaskList [ntasks].kfirst = k ;
                TaskList [ntasks].klast  = -1 ; // this is a fine task
                TaskList [ntasks].pM = (m_empty) ? -1 : pM_start ;
                TaskList [ntasks].pA = (a_empty) ? -1 : pA_start ;
                TaskList [ntasks].pB = (b_empty) ? -1 : pB_start ;
                TaskList [ntasks].len = 0 ;     // to be determined below
                ntasks++ ;
                int64_t ilast = 0, i = 0 ;

                for (int tfine = 1 ; tfine < nfine ; tfine++)
                { 
                    double target_work = ((nfine-tfine) * ckwork) / nfine ;
                    int64_t pM, pA, pB ;
                    GB_slice_vector (&i, &pM, &pA, &pB,
                        pM_start, pM_end, Mi,
                        pA_start, pA_end, Ai,
                        pB_start, pB_end, Bi,
                        vlen, target_work) ;

                    // prior task ends at pM-1, pA-1, and pB-1
                    TaskList [ntasks-1].pM_end = pM ;
                    TaskList [ntasks-1].pA_end = pA ;
                    TaskList [ntasks-1].pB_end = pB ;

                    // prior task handles indices ilast:i-1
                    TaskList [ntasks-1].len = i - ilast ;

                    // this task starts at pM, pA, and pB 
                    ASSERT (ntasks < max_ntasks) ;
                    TaskList [ntasks].kfirst = k ;
                    TaskList [ntasks].klast  = -1 ; // this is a fine task
                    TaskList [ntasks].pM = pM ;
                    TaskList [ntasks].pA = pA ;
                    TaskList [ntasks].pB = pB ;

                    // advance to the next task
                    ntasks++ ;
                    ilast = i ;
                }

                // Terminate the last fine task.
                ASSERT (ntasks <= max_ntasks) ;
                TaskList [ntasks-1].pM_end = (m_empty) ? -1 : pM_end ;
                TaskList [ntasks-1].pA_end = (a_empty) ? -1 : pA_end ;
                TaskList [ntasks-1].pB_end = (b_empty) ? -1 : pB_end ;
                TaskList [ntasks-1].len = vlen - i ;
            }
        }
    }

    ASSERT (ntasks <= max_ntasks) ;

    //--------------------------------------------------------------------------
    // free workspace and return result
    //--------------------------------------------------------------------------

    GB_FREE_WORKSPACE ;
    (*p_TaskList     ) = TaskList ;
    (*p_TaskList_size) = TaskList_size ;
    (*p_ntasks       ) = ntasks ;
    (*p_nthreads     ) = nthreads ;
    return (GrB_SUCCESS) ;
}