1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
//------------------------------------------------------------------------------
// GB_AxB_dot3_phase1_template: analysis phase for dot3; C<M> = A'*B
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
{
int taskid ;
#pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
for (taskid = 0 ; taskid < ntasks ; taskid++)
{
//----------------------------------------------------------------------
// get the task descriptor
//----------------------------------------------------------------------
int64_t kfirst = TaskList [taskid].kfirst ;
int64_t klast = TaskList [taskid].klast ;
bool fine_task = (klast == -1) ;
if (fine_task)
{
// a fine task operates on a slice of a single vector
klast = kfirst ;
}
//----------------------------------------------------------------------
// compute all vectors in this task
//----------------------------------------------------------------------
for (int64_t k = kfirst ; k <= klast ; k++)
{
//------------------------------------------------------------------
// get j, the kth vector of C and M
//------------------------------------------------------------------
#if defined ( GB_MASK_SPARSE_AND_STRUCTURAL )
// M and C are sparse
const int64_t j = k ;
#else
// M and C are either both sparse or both hypersparse
const int64_t j = GBH (Ch, k) ;
#endif
GB_GET_VECTOR (pM, pM_end, pM, pM_end, Mp, k, mvlen) ;
//------------------------------------------------------------------
// get B(:,j)
//------------------------------------------------------------------
#if GB_B_IS_HYPER
// B is hyper: find B(:,j) using the B->Y hyper hash
int64_t pB_start, pB_end ;
GB_hyper_hash_lookup (Bp, B_Yp, B_Yi, B_Yx, B_hash_bits,
j, &pB_start, &pB_end) ;
#elif GB_B_IS_SPARSE
// B is sparse
const int64_t pB_start = Bp [j] ;
const int64_t pB_end = Bp [j+1] ;
#else
// B is bitmap or full
const int64_t pB_start = j * vlen ;
const int64_t pB_end = (j+1) * vlen ;
#endif
const int64_t bjnz = pB_end - pB_start ;
//------------------------------------------------------------------
// estimate the work to compute each entry of C(:,j)
//------------------------------------------------------------------
// A decent estimate of the work to compute the dot product C(i,j)
// = A(:,i)'*B(:,j) is min (|A(:,i)|, |B(:,j)|) + 1. This is a
// lower bound. The actual work could require a binary search of
// either A(:,i) or B(:,j), or a merge of the two vectors. Or it
// could require no work at all if all entries in A(:,i) appear
// before all entries in B(:,j), or visa versa. No work is done if
// M(i,j)=0.
if (bjnz == 0)
{
// B(:,j) is empty, so C(:,j) is empty as well. No work is to
// be done, but it still takes unit work to flag each C(:,j) as
// a zombie
for ( ; pM < pM_end ; pM++)
{
Cwork [pM] = 1 ;
}
}
else
{
for ( ; pM < pM_end ; pM++)
{
int64_t work = 1 ;
#if !defined ( GB_MASK_SPARSE_AND_STRUCTURAL )
// if M is structural, no need to check its values
if (GB_mcast (Mx, pM, msize))
#endif
{
const int64_t i = Mi [pM] ;
#if GB_A_IS_HYPER
// A is hyper: find A(:,i) using the A->Y hyper hash
int64_t pA, pA_end ;
GB_hyper_hash_lookup (Ap, A_Yp, A_Yi, A_Yx, A_hash_bits,
i, &pA, &pA_end) ;
const int64_t ainz = pA_end - pA ;
work += GB_IMIN (ainz, bjnz) ;
#elif GB_A_IS_SPARSE
// A is sparse
const int64_t pA = Ap [i] ;
const int64_t pA_end = Ap [i+1] ;
const int64_t ainz = pA_end - pA ;
work += GB_IMIN (ainz, bjnz) ;
#else
// A is bitmap or full
work += bjnz ;
#endif
}
Cwork [pM] = work ;
}
}
}
}
}
|