File: GB_add.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (249 lines) | stat: -rw-r--r-- 9,890 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
//------------------------------------------------------------------------------
// GB_add: C = A+B, C<M>=A+B, and C<!M>=A+B
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// GB_add computes C=A+B, C<M>=A+B, or C<!M>=A+B using the given operator
// element-wise on the matrices A and B.  The result is typecasted as needed.
// The pattern of C is the union of the pattern of A and B, intersection with
// the mask M, if present.  On input, the contents of C are undefined; it is
// an output-only matrix in a static header.

// Let the op be z=f(x,y) where x, y, and z have type xtype, ytype, and ztype.
// If both A(i,j) and B(i,j) are present, then:

//      C(i,j) = (ctype) op ((xtype) A(i,j), (ytype) B(i,j))

// If just A(i,j) is present but not B(i,j), then:

//      C(i,j) = (ctype) A (i,j)

// If just B(i,j) is present but not A(i,j), then:

//      C(i,j) = (ctype) B (i,j)

// ctype is the type of matrix C.  The pattern of C is the union of A and B.

// op may be NULL.  In this case, the intersection of A and B must be empty.
// This is used by GB_wait only, for merging the pending tuple matrix T into A.
// In this case, the result C is always sparse or hypersparse, not bitmap or
// full.  Any duplicate pending tuples have already been summed in T, so the
// intersection of T and A is always empty.

// Some methods should not exploit the mask, but leave it for later.
// See GB_ewise and GB_accum_mask: the only places where this function is
// called with a non-null mask M.  Both of those callers can handle the
// mask being applied later.  GB_add_sparsity determines whether or not the
// mask should be applied now, or later.

// If A and B are iso, the op is not positional, and op(A,B) == A == B, then C
// is iso.  If A and B are known to be disjoint, then op(A,B) is ignored when
// determining if C is iso.

// C on input is empty, see GB_add_phase2.c.

#include "GB_add.h"

#define GB_FREE_ALL ;

GrB_Info GB_add             // C=A+B, C<M>=A+B, or C<!M>=A+B
(
    GrB_Matrix C,           // output matrix, static header
    const GrB_Type ctype,   // type of output matrix C
    const bool C_is_csc,    // format of output matrix C
    const GrB_Matrix M,     // optional mask for C, unused if NULL
    const bool Mask_struct, // if true, use the only structure of M
    const bool Mask_comp,   // if true, use !M
    bool *mask_applied,     // if true, the mask was applied
    const GrB_Matrix A,     // input A matrix
    const GrB_Matrix B,     // input B matrix
    const bool is_eWiseUnion,   // if true, eWiseUnion, else eWiseAdd
    const GrB_Scalar alpha, // alpha and beta ignored for eWiseAdd,
    const GrB_Scalar beta,  // nonempty scalars for GxB_eWiseUnion
    const GrB_BinaryOp op,  // op to perform C = op (A,B)
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    GrB_Info info ;

    ASSERT (C != NULL && (C->static_header || GBNSTATIC)) ;

    ASSERT (mask_applied != NULL) ;
    (*mask_applied) = false ;

    ASSERT_MATRIX_OK (A, "A for add", GB0) ;
    ASSERT_MATRIX_OK (B, "B for add", GB0) ;
    ASSERT_BINARYOP_OK_OR_NULL (op, "op for add", GB0) ;
    ASSERT_MATRIX_OK_OR_NULL (M, "M for add", GB0) ;
    ASSERT (A->vdim == B->vdim && A->vlen == B->vlen) ;
    ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim && A->vlen == M->vlen)) ;

    //--------------------------------------------------------------------------
    // delete any lingering zombies and assemble any pending tuples
    //--------------------------------------------------------------------------

    // TODO: some cases can allow M, A, and/or B to be jumbled
    GB_MATRIX_WAIT (M) ;        // cannot be jumbled
    GB_MATRIX_WAIT (A) ;        // cannot be jumbled
    GB_MATRIX_WAIT (B) ;        // cannot be jumbled

    //--------------------------------------------------------------------------
    // determine the sparsity of C
    //--------------------------------------------------------------------------

    bool apply_mask ;
    int C_sparsity = GB_add_sparsity (&apply_mask, M, Mask_comp, A, B) ;

    //--------------------------------------------------------------------------
    // initializations
    //--------------------------------------------------------------------------

    int64_t Cnvec, Cnvec_nonempty ;
    int64_t *Cp     = NULL ; size_t Cp_size = 0 ;
    int64_t *Ch     = NULL ; size_t Ch_size = 0 ; 
    int64_t *C_to_M = NULL ; size_t C_to_M_size = 0 ;
    int64_t *C_to_A = NULL ; size_t C_to_A_size = 0 ;
    int64_t *C_to_B = NULL ; size_t C_to_B_size = 0 ;
    bool Ch_is_Mh ;
    int C_ntasks = 0, C_nthreads ;
    GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ;

    //--------------------------------------------------------------------------
    // phase0: finalize the sparsity C and find the vectors in C
    //--------------------------------------------------------------------------

    info = GB_add_phase0 (
        // computed by by phase0:
        &Cnvec, &Ch, &Ch_size, &C_to_M, &C_to_M_size, &C_to_A, &C_to_A_size,
        &C_to_B, &C_to_B_size, &Ch_is_Mh,
        // input/output to phase0:
        &C_sparsity,
        // original input:
        (apply_mask) ? M : NULL, A, B, Context) ;
    if (info != GrB_SUCCESS)
    { 
        // out of memory
        return (info) ;
    }

    GBURBLE ("add:(%s<%s>=%s+%s) ",
        GB_sparsity_char (C_sparsity),
        GB_sparsity_char_matrix (M),
        GB_sparsity_char_matrix (A),
        GB_sparsity_char_matrix (B)) ;

    //--------------------------------------------------------------------------
    // phase1: split C into tasks, and count entries in each vector of C
    //--------------------------------------------------------------------------

    if (C_sparsity == GxB_SPARSE || C_sparsity == GxB_HYPERSPARSE)
    {

        //----------------------------------------------------------------------
        // C is sparse or hypersparse: slice and analyze the C matrix
        //----------------------------------------------------------------------

        // phase1a: split C into tasks
        info = GB_ewise_slice (
            // computed by phase1a
            &TaskList, &TaskList_size, &C_ntasks, &C_nthreads,
            // computed by phase0:
            Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
            // original input:
            (apply_mask) ? M : NULL, A, B, Context) ;
        if (info != GrB_SUCCESS)
        { 
            // out of memory; free everything allocated by GB_add_phase0
            GB_FREE (&Ch, Ch_size) ;
            GB_FREE_WORK (&C_to_M, C_to_M_size) ;
            GB_FREE_WORK (&C_to_A, C_to_A_size) ;
            GB_FREE_WORK (&C_to_B, C_to_B_size) ;
            return (info) ;
        }

        // count the number of entries in each vector of C
        info = GB_add_phase1 (
            // computed or used by phase1:
            &Cp, &Cp_size, &Cnvec_nonempty, op == NULL,
            // from phase1a:
            TaskList, C_ntasks, C_nthreads,
            // from phase0:
            Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
            // original input:
            (apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Context) ;
        if (info != GrB_SUCCESS)
        { 
            // out of memory; free everything allocated by GB_add_phase0
            GB_FREE_WORK (&TaskList, TaskList_size) ;
            GB_FREE (&Ch, Ch_size) ;
            GB_FREE_WORK (&C_to_M, C_to_M_size) ;
            GB_FREE_WORK (&C_to_A, C_to_A_size) ;
            GB_FREE_WORK (&C_to_B, C_to_B_size) ;
            return (info) ;
        }

    }
    else
    { 

        //----------------------------------------------------------------------
        // C is bitmap or full: only determine how many threads to use
        //----------------------------------------------------------------------

        GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
        C_nthreads = GB_nthreads (A->vlen * A->vdim, chunk, nthreads_max) ;
    }

    //--------------------------------------------------------------------------
    // phase2: compute the entries (indices and values) in each vector of C
    //--------------------------------------------------------------------------

    // Cp and Ch are either freed by phase2, or transplanted into C.
    // Either way, they are not freed here.

    info = GB_add_phase2 (
        // computed or used by phase2:
        C, ctype, C_is_csc, op,
        // from phase1
        &Cp, Cp_size, Cnvec_nonempty,
        // from phase1a:
        TaskList, C_ntasks, C_nthreads,
        // from phase0:
        Cnvec, &Ch, Ch_size, C_to_M, C_to_A, C_to_B, Ch_is_Mh, C_sparsity,
        // original input:
        (apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B,
        is_eWiseUnion, alpha, beta, Context) ;

    // Ch and Cp must not be freed; they are now C->h and C->p.
    // If the method failed, Cp and Ch have already been freed.

    // free workspace
    GB_FREE_WORK (&TaskList, TaskList_size) ;
    GB_FREE_WORK (&C_to_M, C_to_M_size) ;
    GB_FREE_WORK (&C_to_A, C_to_A_size) ;
    GB_FREE_WORK (&C_to_B, C_to_B_size) ;

    if (info != GrB_SUCCESS)
    { 
        // out of memory
        return (info) ;
    }

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    ASSERT_MATRIX_OK (C, "C output for add", GB0) ;
    (*mask_applied) = apply_mask ;
    return (GrB_SUCCESS) ;
}