1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
//------------------------------------------------------------------------------
// GB_subassign_05d_template: C<M> = x where C is full
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// Method 05d: C(:,:)<M> = scalar ; no S, C is dense
// M: present, can be sparse, hypersparse, bitmap, or full
// Mask_comp: false
// Mask_struct: true or false
// C_replace: false
// accum: NULL
// A: scalar
// S: none
// C can have any sparsity structure, but it must be entirely dense with
// all entries present.
#undef GB_FREE_ALL
#define GB_FREE_ALL \
{ \
GB_WERK_POP (M_ek_slicing, int64_t) ; \
}
{
//--------------------------------------------------------------------------
// Parallel: slice M into equal-sized chunks
//--------------------------------------------------------------------------
GrB_Info info ;
GB_WERK_DECLARE (M_ek_slicing, int64_t) ;
int M_ntasks, M_nthreads ;
GB_M_NHELD (M_nnz_held) ;
GB_SLICE_MATRIX_WORK (M, 8, M_nnz_held + M->nvec, M_nnz_held) ;
//--------------------------------------------------------------------------
// get C and M
//--------------------------------------------------------------------------
ASSERT (GB_JUMBLED_OK (M)) ;
ASSERT (!C->iso) ;
GB_Mp_DECLARE (Mp, const) ; GB_Mp_PTR (Mp, M) ;
GB_Mh_DECLARE (Mh, const) ; GB_Mh_PTR (Mh, M) ;
GB_Mi_DECLARE (Mi, const) ; GB_Mi_PTR (Mi, M) ;
const int8_t *restrict Mb = M->b ;
const GB_M_TYPE *restrict
Mx = (GB_M_TYPE *) (GB_MASK_STRUCT ? NULL : (M->x)) ;
const size_t Mvlen = M->vlen ;
const size_t msize = M->type->size ;
#ifndef GB_JIT_KERNEL
const bool M_is_bitmap = (Mb != NULL) ;
#endif
GB_C_TYPE *restrict Cx = (GB_C_TYPE *) C->x ;
const int64_t Cvlen = C->vlen ;
//--------------------------------------------------------------------------
// C<M> = x
//--------------------------------------------------------------------------
int taskid ;
#pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1)
for (taskid = 0 ; taskid < M_ntasks ; taskid++)
{
// if kfirst > klast then taskid does no work at all
int64_t kfirst = kfirst_Mslice [taskid] ;
int64_t klast = klast_Mslice [taskid] ;
//----------------------------------------------------------------------
// C<M(:,kfirst:klast)> = x
//----------------------------------------------------------------------
for (int64_t k = kfirst ; k <= klast ; k++)
{
//------------------------------------------------------------------
// find the part of M(:,k) to be operated on by this task
//------------------------------------------------------------------
int64_t j = GBh_M (Mh, k) ;
GB_GET_PA (pM_start, pM_end, taskid, k,
kfirst, klast, pstart_Mslice,
GBp_M (Mp, k, Mvlen), GBp_M (Mp, k+1, Mvlen)) ;
// pC_start points to the start of C(:,j)
int64_t pC_start = j * Cvlen ;
//------------------------------------------------------------------
// C<M(:,j)> = x
//------------------------------------------------------------------
if (GB_MASK_STRUCT && !GB_M_IS_BITMAP)
{
// mask is structural and not bitmap
GB_PRAGMA_SIMD_VECTORIZE
for (int64_t pM = pM_start ; pM < pM_end ; pM++)
{
int64_t pC = pC_start + GBi_M (Mi, pM, Mvlen) ;
// Cx [pC] = cwork
GB_COPY_cwork_to_C (Cx, pC, cwork, false) ;
}
}
else
{
GB_PRAGMA_SIMD_VECTORIZE
for (int64_t pM = pM_start ; pM < pM_end ; pM++)
{
if (GBb_M (Mb, pM) && GB_MCAST (Mx, pM, msize))
{
int64_t pC = pC_start + GBi_M (Mi, pM, Mvlen) ;
// Cx [pC] = cwork
GB_COPY_cwork_to_C (Cx, pC, cwork, false) ;
}
}
}
}
}
GB_FREE_ALL ;
}
#undef GB_FREE_ALL
#define GB_FREE_ALL ;
|