File: GB_AxB_dot2.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (503 lines) | stat: -rw-r--r-- 18,912 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
//------------------------------------------------------------------------------
// GB_AxB_dot2: compute C<#M>=A'*B, C is bitmap, or C<#M>=A*B (C bitmap/full)
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// This method always constructs C as bitmap; it then converts C to sparse or
// hyper if A or B are hypersparse.  The C<M>=A'*B dot product when C is sparse
// is computed by GB_AxB_dot3.  This method handles the case when C is bitmap.

// If A_not_transposed is true, then C=A*B is computed for GB_AxB_saxpy.  A is
// bitmap or full, and the dot product method accesses A with a different
// stride than when computing C=A'*B.

// TODO:  this is slower than it could be if A and B are both bitmap/full, when
// A->vlen is large.  This is because the inner loop is a simple full/bitmap
// dot product, across the entire input vectors.  No tiling is used, so cache
// performance is not as good as it could be.  For large problems, C=(A')*B is
// faster with the saxpy3 method, as compared to this method with C=A'*B.

#define GB_FREE_ALL                         \
{                                           \
    GB_Matrix_free (&M2) ;                  \
    GB_Matrix_free (&A2) ;                  \
    GB_Matrix_free (&B2) ;                  \
    GB_WERK_POP (M_ek_slicing, int64_t) ;   \
    GB_WERK_POP (B_slice, int64_t) ;        \
    GB_WERK_POP (A_slice, int64_t) ;        \
}

#include "GB_mxm.h"
#include "GB_subref.h"
#include "GB_ek_slice.h"
#include "GB_bitmap_assign_methods.h"
#include "GB_stringify.h"
#include "GB_AxB__include1.h"
#ifndef GBCUDA_DEV
#include "GB_AxB__include2.h"
#endif

GB_PUBLIC
GrB_Info GB_AxB_dot2                // C=A'*B or C<#M>=A'*B, dot product method
(
    GrB_Matrix C,                   // output matrix, static header
    const bool C_iso,               // true if C is iso
    const GB_void *cscalar,         // iso value of C
    const GrB_Matrix M_in,          // mask matrix for C<#M>=A'*B, may be NULL
    const bool Mask_comp,           // if true, use !M
    const bool Mask_struct,         // if true, use the only structure of M
    const bool A_not_transposed,    // if true, C=A*B, else C=A'*B
    const GrB_Matrix A_in,          // input matrix
    const GrB_Matrix B_in,          // input matrix
    const GrB_Semiring semiring,    // semiring that defines C=A*B
    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    GrB_Info info ;

    ASSERT (C != NULL && (C->static_header || GBNSTATIC)) ;
    ASSERT_MATRIX_OK_OR_NULL (M_in, "M for dot A'*B", GB0) ;
    ASSERT_MATRIX_OK (A_in, "A for dot A'*B", GB0) ;
    ASSERT_MATRIX_OK (B_in, "B for dot A'*B", GB0) ;

    ASSERT (!GB_ZOMBIES (M_in)) ;
    ASSERT (GB_JUMBLED_OK (M_in)) ;
    ASSERT (!GB_PENDING (M_in)) ;
    ASSERT (!GB_ZOMBIES (A_in)) ;
    ASSERT (!GB_JUMBLED (A_in)) ;
    ASSERT (!GB_PENDING (A_in)) ;
    ASSERT (!GB_ZOMBIES (B_in)) ;
    ASSERT (!GB_JUMBLED (B_in)) ;
    ASSERT (!GB_PENDING (B_in)) ;

    ASSERT_SEMIRING_OK (semiring, "semiring for numeric A'*B", GB0) ;

    struct GB_Matrix_opaque A2_header, B2_header, M2_header ;
    GrB_Matrix M = NULL, M2 = NULL, A2 = NULL, B2 = NULL, A = NULL, B = NULL ;
    GB_WERK_DECLARE (A_slice, int64_t) ;
    GB_WERK_DECLARE (B_slice, int64_t) ;
    GB_WERK_DECLARE (M_ek_slicing, int64_t) ;

    // GB_AxB_saxpy punts to this dot2 method for for C=A*B, and in this case,
    // A is bitmap or full, and B is hypersparse or sparse
    bool A_is_full = GB_as_if_full (A_in) ;
    bool B_is_full = GB_as_if_full (B_in) ;
    bool A_bitmap_or_full = (GB_IS_BITMAP (A_in) || A_is_full) ;
    bool B_bitmap_or_full = (GB_IS_BITMAP (B_in) || B_is_full) ;
    ASSERT (GB_IMPLIES (A_not_transposed,
        (GB_IS_BITMAP (A_in) || GB_IS_FULL (A_in)) &&
        (GB_IS_SPARSE (B_in) || GB_IS_HYPERSPARSE (B_in)))) ;

    //--------------------------------------------------------------------------
    // construct hyper_shallow versions of A and B, if hypersparse
    //--------------------------------------------------------------------------

    // If A_in is hypersparse, a new sparse matrix A is constructed with
    // A->vdim = A_in->nvec and the same vlen as A_in, and then the
    // hyper_shallow C->vlen will equal A->vdim < cvlen_final.

    // If B_in is hypersparse, a new sparse matrix B is constructed with
    // B->vdim = B_in->nvec and the same vlen as B_in, and then the
    // hyper_shallow C->vdim will equal B->vdim < cvdim_final.

    int64_t cvlen_final = (A_not_transposed) ? A_in->vlen : A_in->vdim ;
    int64_t cvdim_final = B_in->vdim ;
    ASSERT (A_in->vlen > 0) ;
    bool A_is_hyper = GB_IS_HYPERSPARSE (A_in) ;
    bool B_is_hyper = GB_IS_HYPERSPARSE (B_in) ;
    bool A_or_B_hyper = A_is_hyper || B_is_hyper ;
    GrB_Index *restrict Ah = (GrB_Index *) A_in->h ;
    GrB_Index *restrict Bh = (GrB_Index *) B_in->h ;

    if (A_is_hyper)
    { 
        // A = hypershallow version of A_in
        GB_CLEAR_STATIC_HEADER (A2, &A2_header) ;
        A = GB_hyper_shallow (A2, A_in) ;
    }
    else
    { 
        // use A_in as-is
        A = A_in ;
    }

    if (B_is_hyper)
    { 
        // B = hypershallow version of B_in
        GB_CLEAR_STATIC_HEADER (B2, &B2_header) ;
        B = GB_hyper_shallow (B2, B_in) ;
    }
    else
    { 
        // use B_in as-is
        B = B_in ;
    }

    ASSERT (!GB_IS_HYPERSPARSE (A)) ;
    ASSERT (!GB_IS_HYPERSPARSE (B)) ;
    ASSERT (GB_IMPLIES (A_not_transposed, !A_is_hyper && (A == A_in))) ;
    bool A_is_sparse = GB_IS_SPARSE (A) ;
    bool B_is_sparse = GB_IS_SPARSE (B) ;

    //--------------------------------------------------------------------------
    // determine the size of C
    //--------------------------------------------------------------------------

    int64_t cnvec = B->nvec ;
    int64_t cvlen = (A_not_transposed) ? A->vlen : A->vdim ;
    int64_t cvdim = B->vdim ;

    int64_t cnz ;
    bool ok = GB_int64_multiply ((GrB_Index *) (&cnz), cvlen, cvdim) ;

    //--------------------------------------------------------------------------
    // extract the submask if A or B are hypersparse 
    //--------------------------------------------------------------------------

    if (A_or_B_hyper && M_in != NULL)
    { 
        // M2 = M_in (Ah, Bh), where M2 has a static header
        // if Mask_struct then M2 is extracted as iso
        GB_CLEAR_STATIC_HEADER (M2, &M2_header) ;
        GB_OK (GB_subref (M2, Mask_struct, M_in->is_csc, M_in,
            (A_is_hyper) ? Ah : GrB_ALL, cvlen,
            (B_is_hyper) ? Bh : GrB_ALL, cvdim, false, Context)) ;
        M = M2 ;
        ASSERT_MATRIX_OK_OR_NULL (M, "M submask dot A'*B", GB0) ;
    }
    else
    { 
        // use the mask as-is
        M = M_in ;
    }

    //--------------------------------------------------------------------------
    // determine the number of threads to use
    //--------------------------------------------------------------------------

    int64_t naslice = 0 ;
    int64_t nbslice = 0 ;

    int64_t anvec = (A_not_transposed) ? A->vlen : A->nvec ;
    double anz = (double) GB_nnz_held (A) ;

    int64_t bnvec = B->nvec ;
    double bnz = (double) GB_nnz_held (B) ;

    double work ;
    if (A_bitmap_or_full && !B_bitmap_or_full)
    { 
        // A is bitmap/full, B is sparse/hyper; only B is scanned
        work = bnz ;
    }
    else if (!A_bitmap_or_full && B_bitmap_or_full)
    {
        // A is sparse/hyper, B is bitmap/full; only A is scanned
        work = anz ;
    }
    else if (A_bitmap_or_full && B_bitmap_or_full)
    { 
        // All of A and B are scanned (unless the mask is present)
        work = ((double) cnz) * ((double) B->vlen) ;
    }
    else
    { 
        // In this case, most of both A and B are scanned.  This is a very
        // rough estimate of the work required.
        work = 10 * (anz + bnz) ;
    }

    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
    int nthreads = GB_nthreads (work, chunk, nthreads_max) ;

    #define GB_NTASKS_PER_THREAD 32

    if (nthreads == 1)
    { 
        // do the entire computation with a single thread
        naslice = 1 ;
        nbslice = 1 ;
    }
    else
    {
        // determine number of slices for A' and B
        if (bnvec == 1)
        { 
            // C and B are single vectors
            naslice = GB_NTASKS_PER_THREAD * nthreads ;
            nbslice = 1 ;
        }
        else if (anvec == 1 || bnvec == 0
            || bnvec > GB_NTASKS_PER_THREAD * nthreads)
        { 
            // A is a single vector, or B is empty, or B is large: just slice B
            naslice = 1 ;
            nbslice = GB_NTASKS_PER_THREAD * nthreads ;
        }
        else
        { 
            // slice B into individual vectors
            nbslice = bnvec ;

            // slice A' to get a total of about 16*nthreads tasks
            naslice = (GB_NTASKS_PER_THREAD * nthreads) / nbslice ;

            // but do not slice A too finely
            naslice = GB_IMIN (naslice, anvec/4) ;
            naslice = GB_IMAX (naslice, nthreads) ;
        }
    }

    GBURBLE ("(nthreads: %d naslice %g nbslice %g) ", nthreads,
        (double) naslice, (double) nbslice) ;

    //--------------------------------------------------------------------------
    // get the semiring operators
    //--------------------------------------------------------------------------

    GrB_BinaryOp mult = semiring->multiply ;
    GrB_Monoid add = semiring->add ;
    ASSERT (mult->ztype == add->op->ztype) ;
    bool A_is_pattern, B_is_pattern ;
    GB_binop_pattern (&A_is_pattern, &B_is_pattern, flipxy, mult->opcode) ;

    //--------------------------------------------------------------------------
    // allocate workspace and slice A and B
    //--------------------------------------------------------------------------

    // A and B can have any sparsity: full, bitmap, sparse, or hypersparse.
    // C is always created as bitmap

    GB_WERK_PUSH (A_slice, naslice + 1, int64_t) ;
    GB_WERK_PUSH (B_slice, nbslice + 1, int64_t) ;
    if (A_slice == NULL || B_slice == NULL || !ok)
    { 
        // out of memory
        GB_FREE_ALL ;
        return (GrB_OUT_OF_MEMORY) ;
    }
    GB_pslice (A_slice, A->p, anvec, naslice, false) ;
    GB_pslice (B_slice, B->p, bnvec, nbslice, false) ;

    //--------------------------------------------------------------------------
    // allocate C
    //--------------------------------------------------------------------------

    // if M is sparse/hyper, then calloc C->b; otherwise use malloc
    bool M_is_sparse_or_hyper = (M != NULL) &&
        (GB_IS_SPARSE (M) || GB_IS_HYPERSPARSE (M)) ;
    GrB_Type ctype = add->op->ztype ;

    // determine the sparsity of C
    int C_sparsity = GxB_BITMAP ;
    if (M == NULL)
    {
        // no mask is present so C can be bitmap or full
        if (A_is_full && B_is_full)
        {
            // C = A*B or A'*B, both A and B full: C is full
            C_sparsity = GxB_FULL ;
        }
        else if (A_is_full && B_is_sparse)
        {
            // C = A*B or A'*B, where A is full and B sparse
            if (B->nvec_nonempty < 0)
            { 
                B->nvec_nonempty = GB_nvec_nonempty (B, Context) ;
            }
            // C is full if all vectors of B are present
            C_sparsity = (B->nvec_nonempty == B->vdim) ?
                GxB_FULL : GxB_BITMAP ;
        }
        else if (A_is_sparse && B_is_full)
        {
            // C = A'*B, where A is sparse and B is full
            if (A->nvec_nonempty < 0)
            { 
                // A->nvec_nonempty is used to select the method 
                A->nvec_nonempty = GB_nvec_nonempty (A, Context) ;
            }
            // C is full if all vectors of A are present
            C_sparsity = (A->nvec_nonempty == A->vdim) ?
                GxB_FULL : GxB_BITMAP ;
        }
    }

    if (M_in == NULL)
    { 
        GBURBLE ("(dot %s = %s%s*%s) ",
            GB_sparsity_char (C_sparsity),
            GB_sparsity_char_matrix (A_in),
            A_not_transposed ? "" : "'",
            GB_sparsity_char_matrix (B_in)) ;
    }
    else
    { 
        GBURBLE ("(dot %s%s%s%s%s = %s%s*%s) ",
            GB_sparsity_char (C_sparsity),
            Mask_struct ? "{" : "<",
            Mask_comp ? "!" : "",
            GB_sparsity_char_matrix (M_in),
            Mask_struct ? "}" : ">",
            GB_sparsity_char_matrix (A_in),
            A_not_transposed ? "" : "'",
            GB_sparsity_char_matrix (B_in)) ;
    }

    // set C->iso = C_iso
    GB_OK (GB_new_bix (&C, // bitmap/full, existing header
        ctype, cvlen, cvdim, GB_Ap_malloc, true, C_sparsity,
        M_is_sparse_or_hyper, B->hyper_switch, cnvec, cnz, true, C_iso,
        Context)) ;

    #ifdef GB_DEBUGIFY_DEFN
    GB_debugify_mxm (C_iso, C_sparsity, ctype, M,
        Mask_struct, Mask_comp, semiring, flipxy, A, B) ;
    #endif

    //--------------------------------------------------------------------------
    // if M is sparse/hyper, scatter it into the C bitmap
    //--------------------------------------------------------------------------

    if (M_is_sparse_or_hyper)
    { 
        // FUTURE:: could just set Cb [pC] = 2 since Cb has just been calloc'd.
        // However, in the future, this method might be able to modify C on
        // input, in which case C->b will not be all zero.

        ASSERT (C_sparsity == GxB_BITMAP) ;
        int M_ntasks, M_nthreads ;
        GB_SLICE_MATRIX (M, 8, chunk) ;

        // Cb [pC] += 2 for each entry M(i,j) in the mask
        GB_bitmap_M_scatter (C,
            NULL, 0, GB_ALL, NULL, NULL, 0, GB_ALL, NULL,
            M, Mask_struct, GB_ASSIGN, GB_BITMAP_M_SCATTER_PLUS_2,
            M_ek_slicing, M_ntasks, M_nthreads, Context) ;
        // the bitmap of C now contains:
        //  Cb (i,j) = 0:   cij not present, mij zero
        //  Cb (i,j) = 1:   cij present, mij zero           (not used yet)
        //  Cb (i,j) = 2:   cij not present, mij 1
        //  Cb (i,j) = 3:   cij present, mij 1              (not used yet)
        GB_WERK_POP (M_ek_slicing, int64_t) ;
    }

    //--------------------------------------------------------------------------
    // C<#>=A'*B, computing each entry with a dot product, via builtin semiring
    //--------------------------------------------------------------------------

    if (C_iso)
    { 

        //----------------------------------------------------------------------
        // C is iso; compute the pattern of C<#>=A'*B with the any_pair semiring
        //----------------------------------------------------------------------

        GBURBLE ("(iso dot2) ") ;
        memcpy (C->x, cscalar, ctype->size) ;
        info = GB (_Adot2B__any_pair_iso) (C, M, Mask_comp, Mask_struct,
            A_not_transposed, A, A_slice, B, B_slice,
            nthreads, naslice, nbslice) ;
        ASSERT (info != GrB_NO_VALUE) ;

    }
    else
    {

        //----------------------------------------------------------------------
        // C is non-iso
        //----------------------------------------------------------------------

        bool done = false ;

        #ifndef GBCUDA_DEV

            //------------------------------------------------------------------
            // define the worker for the switch factory
            //------------------------------------------------------------------

            #define GB_Adot2B(add,mult,xname) \
                GB (_Adot2B_ ## add ## mult ## xname)

            #define GB_AxB_WORKER(add,mult,xname)                           \
            {                                                               \
                info = GB_Adot2B (add,mult,xname) (C, M, Mask_comp,         \
                    Mask_struct, A_not_transposed, A, A_slice,              \
                    B, B_slice, nthreads, naslice, nbslice) ;               \
                done = (info != GrB_NO_VALUE) ;                             \
            }                                                               \
            break ;

            //------------------------------------------------------------------
            // launch the switch factory
            //------------------------------------------------------------------

            GB_Opcode mult_binop_code, add_binop_code ;
            GB_Type_code xcode, ycode, zcode ;

            if (GB_AxB_semiring_builtin (A, A_is_pattern, B, B_is_pattern,
                semiring, flipxy, &mult_binop_code, &add_binop_code, &xcode,
                &ycode, &zcode))
            { 
                #include "GB_AxB_factory.c"
            }
            ASSERT (info == GrB_SUCCESS || info == GrB_NO_VALUE) ;

        #endif

        //----------------------------------------------------------------------
        // C = A'*B or A*B, using the dot product method, with typecasting
        //----------------------------------------------------------------------

        if (!done)
        { 
            #define GB_DOT2_GENERIC
            GB_BURBLE_MATRIX (C, "(generic C%s=A%s*B, C %s) ",
                (M == NULL) ? "" : (Mask_comp ? "<!M>" : "<M>"),
                A_not_transposed ? "" : "'",
                (C_sparsity == GxB_BITMAP) ? "bitmap" : "full") ;
            #include "GB_AxB_dot_generic.c"
        }
    }

    //--------------------------------------------------------------------------
    // free workspace
    //--------------------------------------------------------------------------

    GB_FREE_ALL ;
    C->magic = GB_MAGIC ;
    ASSERT_MATRIX_OK (C, "dot2: result C, before expand", GB0) ;
    ASSERT (!GB_ZOMBIES (C)) ;

    //--------------------------------------------------------------------------
    // convert C to sparse/hyper if A or B are hypersparse on input
    //--------------------------------------------------------------------------

    if (A_or_B_hyper)
    { 
        GB_OK (GB_bitmap_expand_to_hyper (C, cvlen_final, cvdim_final,
            A_in, B_in, Context)) ;
    }

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    ASSERT_MATRIX_OK (C, "dot2: result C, after expand", GB0) ;
    ASSERT (GB_ZOMBIES_OK (C)) ;
    ASSERT (!GB_JUMBLED (C)) ;
    ASSERT (!GB_PENDING (C)) ;
    ASSERT (C->nvec_nonempty >= 0) ;
    return (GrB_SUCCESS) ;
}