1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
|
//------------------------------------------------------------------------------
// GB_add: C = A+B, C<M>=A+B, and C<!M>=A+B
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// GB_add computes C=A+B, C<M>=A+B, or C<!M>=A+B using the given operator
// element-wise on the matrices A and B. The result is typecasted as needed.
// The pattern of C is the union of the pattern of A and B, intersection with
// the mask M, if present. On input, the contents of C are undefined; it is
// an output-only matrix in a static header.
// Let the op be z=f(x,y) where x, y, and z have type xtype, ytype, and ztype.
// If both A(i,j) and B(i,j) are present, then:
// C(i,j) = (ctype) op ((xtype) A(i,j), (ytype) B(i,j))
// If just A(i,j) is present but not B(i,j), then:
// C(i,j) = (ctype) A (i,j)
// If just B(i,j) is present but not A(i,j), then:
// C(i,j) = (ctype) B (i,j)
// ctype is the type of matrix C. The pattern of C is the union of A and B.
// op may be NULL. In this case, the intersection of A and B must be empty.
// This is used by GB_wait only, for merging the pending tuple matrix T into A.
// In this case, the result C is always sparse or hypersparse, not bitmap or
// full. Any duplicate pending tuples have already been summed in T, so the
// intersection of T and A is always empty.
// Some methods should not exploit the mask, but leave it for later.
// See GB_ewise and GB_accum_mask: the only places where this function is
// called with a non-null mask M. Both of those callers can handle the
// mask being applied later. GB_add_sparsity determines whether or not the
// mask should be applied now, or later.
// If A and B are iso, the op is not positional, and op(A,B) == A == B, then C
// is iso. If A and B are known to be disjoint, then op(A,B) is ignored when
// determining if C is iso.
// C on input is empty, see GB_add_phase2.c.
#include "GB_add.h"
#define GB_FREE_ALL ;
GrB_Info GB_add // C=A+B, C<M>=A+B, or C<!M>=A+B
(
GrB_Matrix C, // output matrix, static header
const GrB_Type ctype, // type of output matrix C
const bool C_is_csc, // format of output matrix C
const GrB_Matrix M, // optional mask for C, unused if NULL
const bool Mask_struct, // if true, use the only structure of M
const bool Mask_comp, // if true, use !M
bool *mask_applied, // if true, the mask was applied
const GrB_Matrix A, // input A matrix
const GrB_Matrix B, // input B matrix
const bool is_eWiseUnion, // if true, eWiseUnion, else eWiseAdd
const GrB_Scalar alpha, // alpha and beta ignored for eWiseAdd,
const GrB_Scalar beta, // nonempty scalars for GxB_eWiseUnion
const GrB_BinaryOp op, // op to perform C = op (A,B)
GB_Context Context
)
{
//--------------------------------------------------------------------------
// check inputs
//--------------------------------------------------------------------------
GrB_Info info ;
ASSERT (C != NULL && (C->static_header || GBNSTATIC)) ;
ASSERT (mask_applied != NULL) ;
(*mask_applied) = false ;
ASSERT_MATRIX_OK (A, "A for add", GB0) ;
ASSERT_MATRIX_OK (B, "B for add", GB0) ;
ASSERT_BINARYOP_OK_OR_NULL (op, "op for add", GB0) ;
ASSERT_MATRIX_OK_OR_NULL (M, "M for add", GB0) ;
ASSERT (A->vdim == B->vdim && A->vlen == B->vlen) ;
ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim && A->vlen == M->vlen)) ;
//--------------------------------------------------------------------------
// delete any lingering zombies and assemble any pending tuples
//--------------------------------------------------------------------------
// TODO: some cases can allow M, A, and/or B to be jumbled
GB_MATRIX_WAIT (M) ; // cannot be jumbled
GB_MATRIX_WAIT (A) ; // cannot be jumbled
GB_MATRIX_WAIT (B) ; // cannot be jumbled
//--------------------------------------------------------------------------
// determine the sparsity of C
//--------------------------------------------------------------------------
bool apply_mask ;
int C_sparsity = GB_add_sparsity (&apply_mask, M, Mask_comp, A, B) ;
//--------------------------------------------------------------------------
// initializations
//--------------------------------------------------------------------------
int64_t Cnvec, Cnvec_nonempty ;
int64_t *Cp = NULL ; size_t Cp_size = 0 ;
int64_t *Ch = NULL ; size_t Ch_size = 0 ;
int64_t *C_to_M = NULL ; size_t C_to_M_size = 0 ;
int64_t *C_to_A = NULL ; size_t C_to_A_size = 0 ;
int64_t *C_to_B = NULL ; size_t C_to_B_size = 0 ;
bool Ch_is_Mh ;
int C_ntasks = 0, C_nthreads ;
GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ;
//--------------------------------------------------------------------------
// phase0: finalize the sparsity C and find the vectors in C
//--------------------------------------------------------------------------
info = GB_add_phase0 (
// computed by by phase0:
&Cnvec, &Ch, &Ch_size, &C_to_M, &C_to_M_size, &C_to_A, &C_to_A_size,
&C_to_B, &C_to_B_size, &Ch_is_Mh,
// input/output to phase0:
&C_sparsity,
// original input:
(apply_mask) ? M : NULL, A, B, Context) ;
if (info != GrB_SUCCESS)
{
// out of memory
return (info) ;
}
GBURBLE ("add:(%s<%s>=%s+%s) ",
GB_sparsity_char (C_sparsity),
GB_sparsity_char_matrix (M),
GB_sparsity_char_matrix (A),
GB_sparsity_char_matrix (B)) ;
//--------------------------------------------------------------------------
// phase1: split C into tasks, and count entries in each vector of C
//--------------------------------------------------------------------------
if (C_sparsity == GxB_SPARSE || C_sparsity == GxB_HYPERSPARSE)
{
//----------------------------------------------------------------------
// C is sparse or hypersparse: slice and analyze the C matrix
//----------------------------------------------------------------------
// phase1a: split C into tasks
info = GB_ewise_slice (
// computed by phase1a
&TaskList, &TaskList_size, &C_ntasks, &C_nthreads,
// computed by phase0:
Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
// original input:
(apply_mask) ? M : NULL, A, B, Context) ;
if (info != GrB_SUCCESS)
{
// out of memory; free everything allocated by GB_add_phase0
GB_FREE (&Ch, Ch_size) ;
GB_FREE_WORK (&C_to_M, C_to_M_size) ;
GB_FREE_WORK (&C_to_A, C_to_A_size) ;
GB_FREE_WORK (&C_to_B, C_to_B_size) ;
return (info) ;
}
// count the number of entries in each vector of C
info = GB_add_phase1 (
// computed or used by phase1:
&Cp, &Cp_size, &Cnvec_nonempty, op == NULL,
// from phase1a:
TaskList, C_ntasks, C_nthreads,
// from phase0:
Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
// original input:
(apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Context) ;
if (info != GrB_SUCCESS)
{
// out of memory; free everything allocated by GB_add_phase0
GB_FREE_WORK (&TaskList, TaskList_size) ;
GB_FREE (&Ch, Ch_size) ;
GB_FREE_WORK (&C_to_M, C_to_M_size) ;
GB_FREE_WORK (&C_to_A, C_to_A_size) ;
GB_FREE_WORK (&C_to_B, C_to_B_size) ;
return (info) ;
}
}
else
{
//----------------------------------------------------------------------
// C is bitmap or full: only determine how many threads to use
//----------------------------------------------------------------------
GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
C_nthreads = GB_nthreads (A->vlen * A->vdim, chunk, nthreads_max) ;
}
//--------------------------------------------------------------------------
// phase2: compute the entries (indices and values) in each vector of C
//--------------------------------------------------------------------------
// Cp and Ch are either freed by phase2, or transplanted into C.
// Either way, they are not freed here.
info = GB_add_phase2 (
// computed or used by phase2:
C, ctype, C_is_csc, op,
// from phase1
&Cp, Cp_size, Cnvec_nonempty,
// from phase1a:
TaskList, C_ntasks, C_nthreads,
// from phase0:
Cnvec, &Ch, Ch_size, C_to_M, C_to_A, C_to_B, Ch_is_Mh, C_sparsity,
// original input:
(apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B,
is_eWiseUnion, alpha, beta, Context) ;
// Ch and Cp must not be freed; they are now C->h and C->p.
// If the method failed, Cp and Ch have already been freed.
// free workspace
GB_FREE_WORK (&TaskList, TaskList_size) ;
GB_FREE_WORK (&C_to_M, C_to_M_size) ;
GB_FREE_WORK (&C_to_A, C_to_A_size) ;
GB_FREE_WORK (&C_to_B, C_to_B_size) ;
if (info != GrB_SUCCESS)
{
// out of memory
return (info) ;
}
//--------------------------------------------------------------------------
// return result
//--------------------------------------------------------------------------
ASSERT_MATRIX_OK (C, "C output for add", GB0) ;
(*mask_applied) = apply_mask ;
return (GrB_SUCCESS) ;
}
|