1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
//------------------------------------------------------------------------------
// GB_AxB_dot3_phase1_template: analysis phase for dot3; C<M> = A'*B
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// Purely symbolic phase1 for the dot3 method of GrB_mxm. This does not
// access any values, except for the mask.
{
int taskid ;
#pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
for (taskid = 0 ; taskid < ntasks ; taskid++)
{
//----------------------------------------------------------------------
// get the task descriptor
//----------------------------------------------------------------------
int64_t kfirst = TaskList [taskid].kfirst ;
int64_t klast = TaskList [taskid].klast ;
bool fine_task = (klast == -1) ;
if (fine_task)
{
// a fine task operates on a slice of a single vector
klast = kfirst ;
}
//----------------------------------------------------------------------
// compute all vectors in this task
//----------------------------------------------------------------------
for (int64_t k = kfirst ; k <= klast ; k++)
{
//------------------------------------------------------------------
// get j, the kth vector of C and M
//------------------------------------------------------------------
#if defined ( GB_MASK_SPARSE_STRUCTURAL_AND_NOT_COMPLEMENTED )
// M and C are sparse
const int64_t j = k ;
#else
// M and C are either both sparse or both hypersparse
const int64_t j = GBh_C (Ch, k) ;
#endif
int64_t pM, pM_end ;
if (fine_task)
{
// A fine task operates on a slice of M(:,k)
pM = TaskList [taskid].pM ;
pM_end = TaskList [taskid].pM_end ;
}
else
{
// vectors are never sliced for a coarse task
pM = GBp_M (Mp, k, mvlen) ;
pM_end = GBp_M (Mp, k+1, mvlen) ;
}
//------------------------------------------------------------------
// get B(:,j)
//------------------------------------------------------------------
#if GB_B_IS_HYPER
// B is hyper: find B(:,j) using the B->Y hyper hash
int64_t pB_start, pB_end ;
GB_hyper_hash_lookup (Bp_is_32, Bj_is_32,
Bh, bnvec, Bp, B_Yp, B_Yi, B_Yx, B_hash_bits,
j, &pB_start, &pB_end) ;
#elif GB_B_IS_SPARSE
// B is sparse
const int64_t pB_start = GB_IGET (Bp, j) ;
const int64_t pB_end = GB_IGET (Bp, j+1) ;
#else
// B is bitmap or full
const int64_t pB_start = j * vlen ;
const int64_t pB_end = (j+1) * vlen ;
#endif
const int64_t bjnz = pB_end - pB_start ;
//------------------------------------------------------------------
// estimate the work to compute each entry of C(:,j)
//------------------------------------------------------------------
// A decent estimate of the work to compute the dot product C(i,j)
// = A(:,i)'*B(:,j) is min (|A(:,i)|, |B(:,j)|) + 1. This is a
// lower bound. The actual work could require a binary search of
// either A(:,i) or B(:,j), or a merge of the two vectors. Or it
// could require no work at all if all entries in A(:,i) appear
// before all entries in B(:,j), or visa versa. No work is done if
// M(i,j)=0.
if (bjnz == 0)
{
// B(:,j) is empty, so C(:,j) is empty as well. No work is to
// be done, but it still takes unit work to flag each C(:,j) as
// a zombie
for ( ; pM < pM_end ; pM++)
{
Cwork [pM] = 1 ;
}
}
else
{
for ( ; pM < pM_end ; pM++)
{
float work = 1 ;
#if !defined ( GB_MASK_SPARSE_STRUCTURAL_AND_NOT_COMPLEMENTED )
// if M is structural, no need to check its values
if (GB_MCAST (Mx, pM, msize))
#endif
{
#if GB_A_IS_HYPER
// A is hyper: find A(:,i) using the A->Y hyper hash
const int64_t i = GB_IGET (Mi, pM) ;
int64_t pA, pA_end ;
GB_hyper_hash_lookup (Ap_is_32, Aj_is_32,
Ah, anvec, Ap, A_Yp, A_Yi, A_Yx, A_hash_bits,
i, &pA, &pA_end) ;
const int64_t ainz = pA_end - pA ;
work += GB_IMIN (ainz, bjnz) ;
#elif GB_A_IS_SPARSE
// A is sparse
const int64_t i = GB_IGET (Mi, pM) ;
const int64_t pA = GB_IGET (Ap, i) ;
const int64_t pA_end = GB_IGET (Ap, i+1) ;
const int64_t ainz = pA_end - pA ;
work += GB_IMIN (ainz, bjnz) ;
#else
// A is bitmap or full
work += bjnz ;
#endif
}
Cwork [pM] = work ;
}
}
}
}
}
|