1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
//------------------------------------------------------------------------------
// GB_add_bitmap_M_sparse_26: C<!M>=A+B, C bitmap; M,A sparse/hyper, B bit/full
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// C is bitmap.
// M is sparse/hyper and complemented.
// B is bitmap/full, A is sparse/hyper.
{
//--------------------------------------------------------------------------
// Method26: C bitmap, A sparse or hypersparse, B bitmap or full
//--------------------------------------------------------------------------
int tid ;
#pragma omp parallel for num_threads(C_nthreads) schedule(static) \
reduction(+:cnvals)
for (tid = 0 ; tid < C_nthreads ; tid++)
{
int64_t pstart, pend, task_cnvals = 0 ;
GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ;
for (int64_t p = pstart ; p < pend ; p++)
{
if (Cb [p] == 0)
{
int8_t b = GBb_B (Bb, p) ;
#ifndef GB_ISO_ADD
if (b)
{
#if GB_IS_EWISEUNION
{
// C (i,j) = alpha + B(i,j)
GB_LOAD_B (bij, Bx, p, B_iso) ;
GB_EWISEOP (Cx, p, alpha_scalar, bij,
p % vlen, p / vlen) ;
}
#else
{
// C (i,j) = B (i,j)
GB_COPY_B_to_C (Cx, p, Bx, p, B_iso) ;
}
#endif
}
#endif
Cb [p] = b ;
task_cnvals += b ;
}
}
cnvals += task_cnvals ;
}
const int64_t *kfirst_Aslice = A_ek_slicing ;
const int64_t *klast_Aslice = A_ek_slicing + A_ntasks ;
const int64_t *pstart_Aslice = A_ek_slicing + A_ntasks*2 ;
#pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1) \
reduction(+:cnvals)
for (taskid = 0 ; taskid < A_ntasks ; taskid++)
{
int64_t kfirst = kfirst_Aslice [taskid] ;
int64_t klast = klast_Aslice [taskid] ;
int64_t task_cnvals = 0 ;
for (int64_t k = kfirst ; k <= klast ; k++)
{
// find the part of A(:,k) for this task
int64_t j = GBh_A (Ah, k) ;
GB_GET_PA (pA_start, pA_end, taskid, k, kfirst, klast,
pstart_Aslice, GB_IGET (Ap, k), GB_IGET (Ap, k+1)) ;
int64_t pC_start = j * vlen ;
// traverse over A(:,j), the kth vector of A
for (int64_t pA = pA_start ; pA < pA_end ; pA++)
{
int64_t i = GB_IGET (Ai, pA) ;
int64_t p = pC_start + i ;
int8_t c = Cb [p] ;
if (c == 1)
{
// C (i,j) = A (i,j) + B (i,j)
#ifndef GB_ISO_ADD
GB_LOAD_A (aij, Ax, pA, A_iso) ;
GB_LOAD_B (bij, Bx, p , B_iso) ;
GB_EWISEOP (Cx, p, aij, bij, i, j) ;
#endif
}
else if (c == 0)
{
#ifndef GB_ISO_ADD
#if GB_IS_EWISEUNION
{
// C (i,j) = A(i,j) + beta
GB_LOAD_A (aij, Ax, pA, A_iso) ;
GB_EWISEOP (Cx, p, aij, beta_scalar, i, j) ;
}
#else
{
// C (i,j) = A (i,j)
GB_COPY_A_to_C (Cx, p, Ax, pA, A_iso) ;
}
#endif
#endif
Cb [p] = 1 ;
task_cnvals++ ;
}
}
}
cnvals += task_cnvals ;
}
}
|