1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
|
//------------------------------------------------------------------------------
// GB_add: C = A+B, C<M>=A+B, and C<!M>=A+B
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// GB_add computes C=A+B, C<M>=A+B, or C<!M>=A+B using the given operator
// element-wise on the matrices A and B. The result is typecasted as needed.
// The pattern of C is the union of the pattern of A and B, intersection with
// the mask M, if present. On input, the contents of C are undefined; it is
// an output-only matrix in a static header.
// Let the op be z=f(x,y) where x, y, and z have type xtype, ytype, and ztype.
// If both A(i,j) and B(i,j) are present, then:
// C(i,j) = (ctype) op ((xtype) A(i,j), (ytype) B(i,j))
// If just A(i,j) is present but not B(i,j), then:
// C(i,j) = (ctype) A (i,j)
// If just B(i,j) is present but not A(i,j), then:
// C(i,j) = (ctype) B (i,j)
// For eWiseUnion, the above is revised to:
// If just A(i,j) is present but not B(i,j), then:
// C(i,j) = (ctype) op ((xtype) A(i,j), (ytype) beta)
// If just B(i,j) is present but not A(i,j), then:
// C(i,j) = (ctype) op ((xtype) alpha, (ytype) B(i,j))
// ctype is the type of matrix C. The pattern of C is the union of A and B.
// If A_and_B_are_disjoint is true, the intersection of A and B must be empty.
// This is used by GB_wait only, for merging the pending tuple matrix T into A.
// In this case, the result C is always sparse or hypersparse, not bitmap or
// full. Any duplicate pending tuples have already been summed in T, so the
// intersection of T and A is always empty.
// Some methods should not exploit the mask, but leave it for later.
// See GB_ewise and GB_accum_mask: the only places where this function is
// called with a non-null mask M. Both of those callers can handle the
// mask being applied later. GB_add_sparsity determines whether or not the
// mask should be applied now, or later.
// If A and B are iso, the op is not positional, and op(A,B) == A == B, then C
// is iso. If A and B are known to be disjoint, then op(A,B) is ignored when
// determining if C is iso.
// C on input is empty, see GB_add_phase2.c.
#include "add/GB_add.h"
#define GB_FREE_WORKSPACE \
{ \
GB_FREE_MEMORY (&TaskList, TaskList_size) ; \
GB_FREE_MEMORY (&C_to_M, C_to_M_size) ; \
GB_FREE_MEMORY (&C_to_A, C_to_A_size) ; \
GB_FREE_MEMORY (&C_to_B, C_to_B_size) ; \
}
#define GB_FREE_ALL \
{ \
GB_FREE_WORKSPACE ; \
GB_FREE_MEMORY (&Ch, Ch_size) ; \
GB_FREE_MEMORY (&Cp, Cp_size) ; \
GB_phybix_free (C) ; \
}
GrB_Info GB_add // C=A+B, C<M>=A+B, or C<!M>=A+B
(
GrB_Matrix C, // output matrix, static header
const GrB_Type ctype, // type of output matrix C
const bool C_is_csc, // format of output matrix C
const GrB_Matrix M, // optional mask for C, unused if NULL
const bool Mask_struct, // if true, use the only structure of M
const bool Mask_comp, // if true, use !M
bool *mask_applied, // if true, the mask was applied
const GrB_Matrix A, // input A matrix
const GrB_Matrix B, // input B matrix
const bool is_eWiseUnion, // if true, eWiseUnion, else eWiseAdd
const GrB_Scalar alpha, // alpha and beta ignored for eWiseAdd,
const GrB_Scalar beta, // nonempty scalars for GxB_eWiseUnion
const GrB_BinaryOp op, // op to perform C = op (A,B)
const bool flipij, // if true, i,j must be flipped
const bool A_and_B_are_disjoint, // if true, A and B are disjoint
GB_Werk Werk
)
{
//--------------------------------------------------------------------------
// check inputs
//--------------------------------------------------------------------------
GrB_Info info ;
ASSERT (C != NULL && (C->header_size == 0 || GBNSTATIC)) ;
ASSERT (mask_applied != NULL) ;
(*mask_applied) = false ;
ASSERT_MATRIX_OK (A, "A for add", GB0) ;
ASSERT_MATRIX_OK (B, "B for add", GB0) ;
ASSERT_BINARYOP_OK (op, "op for add", GB0) ;
ASSERT_MATRIX_OK_OR_NULL (M, "M for add", GB0) ;
ASSERT (A->vdim == B->vdim && A->vlen == B->vlen) ;
ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim && A->vlen == M->vlen)) ;
//--------------------------------------------------------------------------
// declare workspace
//--------------------------------------------------------------------------
int64_t Cnvec = 0, Cnvec_nonempty = 0 ;
void *Cp = NULL ; size_t Cp_size = 0 ;
void *Ch = NULL ; size_t Ch_size = 0 ;
int64_t *C_to_M = NULL ; size_t C_to_M_size = 0 ;
int64_t *C_to_A = NULL ; size_t C_to_A_size = 0 ;
int64_t *C_to_B = NULL ; size_t C_to_B_size = 0 ;
bool Ch_is_Mh ;
int C_ntasks = 0, C_nthreads ;
GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ;
bool Cp_is_32, Cj_is_32, Ci_is_32 ;
//--------------------------------------------------------------------------
// delete any lingering zombies and assemble any pending tuples
//--------------------------------------------------------------------------
// FUTURE: some cases can allow M, A, and/or B to be jumbled
GB_MATRIX_WAIT (M) ; // cannot be jumbled
GB_MATRIX_WAIT (A) ; // cannot be jumbled
GB_MATRIX_WAIT (B) ; // cannot be jumbled
//--------------------------------------------------------------------------
// determine the sparsity of C
//--------------------------------------------------------------------------
bool apply_mask ;
int C_sparsity = GB_add_sparsity (&apply_mask, M, Mask_struct, Mask_comp,
A, B) ;
//--------------------------------------------------------------------------
// phase0: finalize the sparsity C and find the vectors in C
//--------------------------------------------------------------------------
GB_OK (GB_add_phase0 (
// computed by phase0:
&Cnvec, &Ch, &Ch_size,
&C_to_M, &C_to_M_size,
&C_to_A, &C_to_A_size,
&C_to_B, &C_to_B_size, &Ch_is_Mh,
&Cp_is_32, &Cj_is_32, &Ci_is_32,
// input/output to phase0:
&C_sparsity,
// original input:
(apply_mask) ? M : NULL, A, B, Werk)) ;
GBURBLE ("add:(%s<%s%s>=%s+%s) ",
GB_sparsity_char (C_sparsity),
GB_sparsity_char_matrix (M),
((M != NULL) && !apply_mask) ? " (mask later)" : "",
GB_sparsity_char_matrix (A),
GB_sparsity_char_matrix (B)) ;
//--------------------------------------------------------------------------
// phase1: split C into tasks, and count entries in each vector of C
//--------------------------------------------------------------------------
if (C_sparsity == GxB_SPARSE || C_sparsity == GxB_HYPERSPARSE)
{
//----------------------------------------------------------------------
// C is sparse or hypersparse: slice and analyze the C matrix
//----------------------------------------------------------------------
// phase1a: split C into tasks
GB_OK (GB_ewise_slice (
// computed by phase1a
&TaskList, &TaskList_size, &C_ntasks, &C_nthreads,
// computed by phase0:
Cnvec, Ch, Cj_is_32, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
// original input:
(apply_mask) ? M : NULL, A, B, Werk)) ;
// count the number of entries in each vector of C
GB_OK (GB_add_phase1 (
// computed or used by phase1:
&Cp, &Cp_size, &Cnvec_nonempty, A_and_B_are_disjoint,
// from phase1a:
TaskList, C_ntasks, C_nthreads,
// from phase0:
Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh, Cp_is_32, Cj_is_32,
// original input:
(apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Werk)) ;
}
else
{
//----------------------------------------------------------------------
// C is bitmap or full: only determine how many threads to use
//----------------------------------------------------------------------
int nthreads_max = GB_Context_nthreads_max ( ) ;
double chunk = GB_Context_chunk ( ) ;
C_nthreads = GB_nthreads (A->vlen * A->vdim, chunk, nthreads_max) ;
}
//--------------------------------------------------------------------------
// phase2: compute the entries (indices and values) in each vector of C
//--------------------------------------------------------------------------
// Cp and Ch are either freed by phase2, or transplanted into C.
// Either way, they are not freed here.
GB_OK (GB_add_phase2 (
// computed or used by phase2:
C, ctype, C_is_csc, op, flipij, A_and_B_are_disjoint,
// from phase1
&Cp, Cp_size, Cnvec_nonempty,
// from phase1a:
TaskList, C_ntasks, C_nthreads,
// from phase0:
Cnvec, &Ch, Ch_size, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
Cp_is_32, Cj_is_32, Ci_is_32, C_sparsity,
// original input:
(apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B,
is_eWiseUnion, alpha, beta, Werk)) ;
// Ch and Cp have been set to NULL and now appear as C->h and C->p.
// If the method failed, Cp and Ch have already been freed.
//--------------------------------------------------------------------------
// free workspace and return result
//--------------------------------------------------------------------------
GB_FREE_WORKSPACE ;
GB_OK (info) ;
ASSERT_MATRIX_OK (C, "C output for add", GB0) ;
(*mask_applied) = apply_mask ;
return (GrB_SUCCESS) ;
}
|