1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
//------------------------------------------------------------------------------
// GB_AxB_saxpy3_coarseGus_M_phase5: C<M>=A*B, coarse Gustavson, phase5
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
{
//--------------------------------------------------------------------------
// phase5: coarse Gustavson task, C<M>=A*B
//--------------------------------------------------------------------------
// Initially, Hf [...] < mark for all of Hf.
// Hf [i] < mark : M(i,j)=0, C(i,j) is ignored.
// Hf [i] == mark : M(i,j)=1, and C(i,j) not yet seen.
// Hf [i] == mark+1 : M(i,j)=1, and C(i,j) has been seen.
for (int64_t kk = kfirst ; kk <= klast ; kk++)
{
int64_t pC = Cp [kk] ;
int64_t cjnz = Cp [kk+1] - pC ;
if (cjnz == 0) continue ; // nothing to do
GB_GET_B_j ; // get B(:,j)
#ifndef GB_GENERIC
if (cjnz == cvlen) // C(:,j) is dense
{
// This is not used for the generic saxpy3.
GB_COMPUTE_DENSE_C_j ; // C(:,j) = A*B(:,j)
continue ;
}
#endif
GB_GET_M_j ; // get M(:,j)
GB_GET_M_j_RANGE (64) ; // get first and last in M(:,j)
mark += 2 ;
int64_t mark1 = mark+1 ;
// scatter M(:,j) into the Gustavson workspace
GB_SCATTER_M_j (pM_start, pM_end, mark) ;
if (16 * cjnz > cvlen)
{
//------------------------------------------------------------------
// C(:,j) is not very sparse
//------------------------------------------------------------------
for ( ; pB < pB_end ; pB++) // scan B(:,j)
{
GB_GET_B_kj_INDEX ; // get k of B(k,j)
GB_GET_A_k ; // get A(:,k)
if (aknz == 0) continue ;
GB_GET_B_kj ; // bkj = B(k,j)
#define GB_IKJ \
{ \
int64_t hf = Hf [i] ; \
if (hf == mark) \
{ \
/* C(i,j) = A(i,k) * B(k,j) */ \
Hf [i] = mark1 ; /* mark as seen */ \
GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \
GB_HX_WRITE (i, t) ; /* Hx [i] = t */ \
} \
else if (hf == mark1) \
{ \
/* C(i,j) += A(i,k) * B(k,j) */ \
GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \
GB_HX_UPDATE (i, t) ; /* Hx [i] += t */ \
} \
}
GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ;
#undef GB_IKJ
}
GB_GATHER_ALL_C_j(mark1) ; // gather into C(:,j)
}
else
{
//------------------------------------------------------------------
// C(:,j) is very sparse
//------------------------------------------------------------------
for ( ; pB < pB_end ; pB++) // scan B(:,j)
{
GB_GET_B_kj_INDEX ; // get k of B(k,j)
GB_GET_A_k ; // get A(:,k)
if (aknz == 0) continue ;
GB_GET_B_kj ; // bkj = B(k,j)
#define GB_IKJ \
{ \
int64_t hf = Hf [i] ; \
if (hf == mark) \
{ \
/* C(i,j) = A(i,k) * B(k,j) */ \
Hf [i] = mark1 ; /* mark as seen */ \
GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \
GB_HX_WRITE (i, t) ; /* Hx [i] = t */ \
Ci [pC++] = i ; /* C(:,j) pattern */ \
} \
else if (hf == mark1) \
{ \
/* C(i,j) += A(i,k) * B(k,j) */ \
GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \
GB_HX_UPDATE (i, t) ; /* Hx [i] += t */ \
} \
}
GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ;
#undef GB_IKJ
}
GB_SORT_AND_GATHER_C_j ; // gather into C(:,j)
}
}
}
|