File: GB_mxm.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (246 lines) | stat: -rw-r--r-- 10,014 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
//------------------------------------------------------------------------------
// GB_mxm: matrix-matrix multiply for GrB_mxm, GrB_mxv, and GrB_vxm
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// C<M> = accum (C,A*B) and variations.

// This function is not user-callable.  It does the work for user-callable
// functions GrB_mxm, GrB_mxv, and GrB_vxm.

#define GB_FREE_ALL         \
{                           \
    GB_Matrix_free (&MT) ;  \
    GB_Matrix_free (&T) ;   \
}

#include "GB_mxm.h"
#include "GB_accum_mask.h"

GrB_Info GB_mxm                     // C<M> = A*B
(
    GrB_Matrix C,                   // input/output matrix for results
    const bool C_replace,           // if true, clear C before writing to it
    const GrB_Matrix M_input,       // optional mask for C, unused if NULL
    const bool Mask_comp,           // if true, use !M
    const bool Mask_struct,         // if true, use the only structure of M
    const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
    const GrB_Semiring semiring,    // defines '+' and '*' for C=A*B
    const GrB_Matrix A,             // input matrix
    const bool A_transpose,         // if true, use A' instead of A
    const GrB_Matrix B,             // input matrix
    const bool B_transpose,         // if true, use B' instead of B
    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
    const int do_sort,              // if nonzero, try to return C unjumbled
    GB_Context Context
)
{

    #ifdef GB_TIMING
    double ttt = omp_get_wtime ( ) ;
    #endif

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    // C may be aliased with M, A, and/or B

    GrB_Info info ;

    struct GB_Matrix_opaque MT_header, T_header ;
    GrB_Matrix MT = NULL, T = NULL ;

    GB_RETURN_IF_FAULTY_OR_POSITIONAL (accum) ;
    GB_RETURN_IF_NULL_OR_FAULTY (semiring) ;

    ASSERT_MATRIX_OK (C, "C input for GB_mxm", GB0) ;
    ASSERT_MATRIX_OK_OR_NULL (M_input, "M for GB_mxm", GB0) ;
    ASSERT_BINARYOP_OK_OR_NULL (accum, "accum for GB_mxm", GB0) ;
    ASSERT_SEMIRING_OK (semiring, "semiring for GB_mxm", GB0) ;
    ASSERT_MATRIX_OK (A, "A for GB_mxm", GB0) ;
    ASSERT_MATRIX_OK (B, "B for GB_mxm", GB0) ;

    // check domains and dimensions for C<M> = accum (C,T)
    GrB_Type T_type = semiring->add->op->ztype ;
    GB_OK (GB_compatible (C->type, C, M_input, Mask_struct, accum, T_type,
        Context)) ;

    // T=A*B via semiring: A and B must be compatible with semiring->multiply
    if (flipxy)
    { 
        // z=fmult(b,a), for entries a from A, and b from B
        GB_OK (GB_BinaryOp_compatible (semiring->multiply,
                NULL, B->type, A->type, GB_ignore_code, Context)) ;
    }
    else
    { 
        // z=fmult(a,b), for entries a from A, and b from B
        GB_OK (GB_BinaryOp_compatible (semiring->multiply,
                NULL, A->type, B->type, GB_ignore_code, Context)) ;
    }

    // check the dimensions
    int64_t anrows = (A_transpose) ? GB_NCOLS (A) : GB_NROWS (A) ;
    int64_t ancols = (A_transpose) ? GB_NROWS (A) : GB_NCOLS (A) ;
    int64_t bnrows = (B_transpose) ? GB_NCOLS (B) : GB_NROWS (B) ;
    int64_t bncols = (B_transpose) ? GB_NROWS (B) : GB_NCOLS (B) ;
    if (ancols != bnrows || GB_NROWS (C) != anrows || GB_NCOLS (C) != bncols)
    { 
        GB_ERROR (GrB_DIMENSION_MISMATCH,
            "Dimensions not compatible:\n"
            "output is " GBd "-by-" GBd "\n"
            "first input is " GBd "-by-" GBd "%s\n"
            "second input is " GBd "-by-" GBd "%s",
            GB_NROWS (C), GB_NCOLS (C),
            anrows, ancols, A_transpose ? " (transposed)" : "",
            bnrows, bncols, B_transpose ? " (transposed)" : "") ;
    }

    //--------------------------------------------------------------------------
    // finish any pending work and check for C<!NULL> mask
    //--------------------------------------------------------------------------

    GrB_Matrix M = M_input ;
    GB_MATRIX_WAIT_IF_PENDING_OR_ZOMBIES (M) ;
    if (Mask_struct && GB_is_dense (M))
    { 
        // ignore the mask if all entries present and not complemented
        M = NULL ;
    }

    // quick return if a NULL mask is complemented
    GB_RETURN_IF_QUICK_MASK (C, C_replace, M, Mask_comp, Mask_struct) ;
    GB_MATRIX_WAIT_IF_PENDING_OR_ZOMBIES (A) ;
    GB_MATRIX_WAIT_IF_PENDING_OR_ZOMBIES (B) ;

    //--------------------------------------------------------------------------
    // T = A*B, A'*B, A*B', or A'*B', also using the mask if present
    //--------------------------------------------------------------------------

    // If C is dense (with no pending work), and the accum is present, then
    // C+=A*B can be done in-place (C_replace is effectively false).  If C is
    // dense, M is present, and C_replace is false, then C<M>+=A*B or
    // C<!M>+=A*B can also be done in-place.  In all of these cases, C remains
    // dense with all entries present.  C can have any sparsity structure;
    // its pattern is ignored.

    // If C is bitmap, then it can always be be done in-place (assuming the
    // type of C is OK).  The accum operator need not be present.  GB_AxB_meta
    // can easily insert non-entries into C and check for non-entries, via the
    // bitmap.

    // To compute C in-place, its type must match the accum->ztype, or the
    // semiring->add->ztype if accum is not present.  To compute in-place,
    // C must also not be transposed, and it cannot be aliased with M, A, or B.

    GB_CLEAR_STATIC_HEADER (MT, &MT_header) ;
    GB_CLEAR_STATIC_HEADER (T, &T_header) ;

// for (int k = 0 ; k < 40 ; k++) GB_Global_timing_clear (k) ;

    bool mask_applied = false ;
    bool done_in_place = false ;
    bool M_transposed = false ;
    GB_OK (GB_AxB_meta (T, C, C_replace, C->is_csc, MT, &M_transposed, M,
        Mask_comp, Mask_struct, accum, A, B, semiring, A_transpose,
        B_transpose, flipxy, &mask_applied, &done_in_place, AxB_method,
        do_sort, Context)) ;

// for (int k = 0 ; k < 40 ; k++)
// {
//      double t = GB_Global_timing_get (k) ;
//      if (t > 0) printf ("%2d: %g\n", k, t) ;
// }

    if (done_in_place)
    { 
        // C has been computed in-place; no more work to do
        GB_FREE_ALL ;
        GB_OK (GB_conform (C, Context)) ;
        ASSERT_MATRIX_OK (C, "C from GB_mxm (in-place)", GB0) ;

        #ifdef GB_TIMING
        ttt = omp_get_wtime ( ) - ttt ;
        GB_Global_timing_add (0, ttt) ;
        #endif

        return (info) ;
    }

    ASSERT_MATRIX_OK (T, "T=A*B from GB_AxB_meta", GB0) ;
    ASSERT_MATRIX_OK_OR_NULL (M_transposed ? MT : NULL, "MT from meta", GB0) ;
    ASSERT (GB_ZOMBIES_OK (T)) ;
    ASSERT (GB_JUMBLED_OK (T)) ;
    ASSERT (!GB_PENDING (T)) ;

    //--------------------------------------------------------------------------
    // C<M> = accum (C,T): accumulate the results into C via the mask
    //--------------------------------------------------------------------------

    if ((accum == NULL) && (C->is_csc == T->is_csc)
        && (M == NULL || (M != NULL && mask_applied))
        && (C_replace || GB_NNZ_UPPER_BOUND (C) == 0))
    {
        // C = 0 ; C = (ctype) T ; with the same CSR/CSC format.  The mask M
        // (if any) has already been applied.  If C is also empty, or to be
        // cleared anyway, and if accum is not present, then T can be
        // transplanted directly into C, as C = (ctype) T, typecasting if
        // needed.  If no typecasting is done then this takes no time at all
        // and is a pure transplant.  Also conform C to its desired
        // hypersparsity.
        GB_Matrix_free (&MT) ;
        if (GB_ZOMBIES (T) && T->type != C->type)
        { 
            // T = A*B can be constructed with zombies, using the dot3 method.
            // Since its type differs from C, its values will be typecasted
            // from T->type to C->type.  The zombies are killed before
            // typecasting.  Otherwise, if they were not killed, uninitialized
            // values in T->x for these zombies will get typecasted into C->x.
            // Typecasting a zombie is safe, since the values of all zombies
            // are ignored.  But valgrind complains about it, so they are
            // killed now.  Also see the discussion in GB_transplant.
            GBURBLE ("(wait, so zombies are not typecasted) ") ;
            GB_OK (GB_wait (T, "T", Context)) ;
        }
        GB_OK (GB_transplant_conform (C, C->type, &T, Context)) ;
        // C may be returned with zombies and jumbled, but no pending tuples
        ASSERT_MATRIX_OK (C, "C from GB_mxm (transplanted)", GB0) ;
        ASSERT (GB_ZOMBIES_OK (C)) ;
        ASSERT (GB_JUMBLED_OK (C)) ;
        ASSERT (!GB_PENDING (C)) ;
        info = GB_block (C, Context) ;

    }
    else
    { 
        // C<M> = accum (C,T)
        // GB_accum_mask also conforms C to its desired hypersparsity.
        info = GB_accum_mask (C, M, (M_transposed) ? MT : NULL, accum, &T,
            C_replace, Mask_comp, Mask_struct, Context) ;
        GB_Matrix_free (&MT) ;
        #ifdef GB_DEBUG
        if (info == GrB_SUCCESS)
        {
            // C may be returned jumbled, with zombies and pending tuples
            ASSERT_MATRIX_OK (C, "Final C from GB_mxm (accum_mask)", GB0) ;
            ASSERT (GB_ZOMBIES_OK (C)) ;
            ASSERT (GB_JUMBLED_OK (C)) ;
            ASSERT (GB_PENDING_OK (C)) ;
        }
        #endif
    }

    #ifdef GB_TIMING
    ttt = omp_get_wtime ( ) - ttt ;
    GB_Global_timing_add (0, ttt) ;
    #endif

    return (info) ;
}