1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
//------------------------------------------------------------------------------
// GB_AxB_dot2_template: C=A'B, C<!M>=A'*B, or C<M>=A'*B via dot products
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// A and B are sparse, bitmap, or full; never hypersparse. If the input
// matrices A and/or B are hypersparse, they are converted into hyper_shallow
// sparse matrices, and C is converted from bitmap to sparse/hypersparse when
// done.
// If A_NOT_TRANSPOSED is #defined, the C=A*B or C<#M>=A*B is computed.
// In this case A is bitmap or full, and B is sparse.
// GB_DOT_ALWAYS_SAVE_CIJ: C(i,j) = cij
#undef GB_DOT_ALWAYS_SAVE_CIJ
#if GB_C_IS_FULL
#define GB_DOT_ALWAYS_SAVE_CIJ \
{ \
GB_PUTC (cij, pC) ; \
}
#else
#define GB_DOT_ALWAYS_SAVE_CIJ \
{ \
GB_PUTC (cij, pC) ; \
Cb [pC] = 1 ; \
task_cnvals++ ; \
}
#endif
// GB_DOT_SAVE_CIJ: C(i,j) = cij, unless already done by GB_DOT
#undef GB_DOT_SAVE_CIJ
#if GB_IS_ANY_MONOID
// for the ANY monoid, GB_DOT saves C(i,j) as soon as a value is found
#define GB_DOT_SAVE_CIJ
#else
// all other monoids: C(i,j) = cij if it exists
#define GB_DOT_SAVE_CIJ \
{ \
if (GB_CIJ_EXISTS) \
{ \
GB_DOT_ALWAYS_SAVE_CIJ ; \
} \
}
#endif
#if ( !GB_A_IS_HYPER && !GB_B_IS_HYPER )
{
//--------------------------------------------------------------------------
// C=A'*B, C<M>=A'*B, or C<!M>=A'*B where C is bitmap
//--------------------------------------------------------------------------
int tid ;
#if GB_C_IS_FULL
#pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
#else
#pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
reduction(+:cnvals)
#endif
for (tid = 0 ; tid < ntasks ; tid++)
{
//----------------------------------------------------------------------
// get the task descriptor
//----------------------------------------------------------------------
const int a_tid = tid / nbslice ;
const int b_tid = tid % nbslice ;
const int64_t kA_start = A_slice [a_tid] ;
const int64_t kA_end = A_slice [a_tid+1] ;
const int64_t kB_start = B_slice [b_tid] ;
const int64_t kB_end = B_slice [b_tid+1] ;
#if (!GB_C_IS_FULL)
int64_t task_cnvals = 0 ;
#endif
//----------------------------------------------------------------------
// C=A'*B, C<M>=A'*B, or C<!M>=A'*B via dot products
//----------------------------------------------------------------------
for (int64_t j = kB_start ; j < kB_end ; j++)
{
//------------------------------------------------------------------
// get C(:,j)
//------------------------------------------------------------------
const int64_t pC_start = j * cvlen ;
//------------------------------------------------------------------
// get B(:,j)
//------------------------------------------------------------------
#if GB_B_IS_SPARSE
// B is sparse (never hypersparse)
const int64_t pB_start = Bp [j] ;
const int64_t pB_end = Bp [j+1] ;
const int64_t bjnz = pB_end - pB_start ;
if (bjnz == 0)
{
// no work to do if B(:,j) is empty, except to clear Cb
memset (&Cb [pC_start + kA_start], 0, kA_end - kA_start) ;
continue ;
}
#if GB_A_IS_SPARSE
// Both A and B are sparse; get first and last in B(:,j)
const int64_t ib_first = Bi [pB_start] ;
const int64_t ib_last = Bi [pB_end-1] ;
#endif
#else
// B is bitmap or full
const int64_t pB_start = j * vlen ;
#endif
//------------------------------------------------------------------
// C(:,j)<#M(:,j)> = A'*B(:,j), or C(:,j) = A'*B(:,j) if no mask
//------------------------------------------------------------------
for (int64_t i = kA_start ; i < kA_end ; i++)
{
//--------------------------------------------------------------
// get C(i,j), M(i,j), and clear the C(i,j) bitmap
//--------------------------------------------------------------
int64_t pC = pC_start + i ; // C is bitmap
#if defined ( GB_ANY_SPECIALIZED )
// M is bitmap and structural; Mask_comp true
Cb [pC] = 0 ;
if (!Mb [pC])
#elif defined ( GB_MASK_IS_PRESENT )
bool mij ;
if (M_is_bitmap)
{
// M is bitmap
mij = Mb [pC] && GB_mcast (Mx, pC, msize) ;
}
else if (M_is_full)
{
// M is full
mij = GB_mcast (Mx, pC, msize) ;
}
else // M is sparse or hyper
{
// M has been scattered into the C bitmap
mij = (Cb [pC] > 1) ;
}
Cb [pC] = 0 ;
if (mij ^ Mask_comp)
#elif GB_C_IS_FULL
// C is full; nothing to do
#else
// M is not present
Cb [pC] = 0 ;
#endif
{
//----------------------------------------------------------
// the mask allows C(i,j) to be computed
//----------------------------------------------------------
#if GB_A_IS_SPARSE
// A is sparse
int64_t pA = Ap [i] ;
const int64_t pA_end = Ap [i+1] ;
const int64_t ainz = pA_end - pA ;
#if (!GB_C_IS_FULL)
if (ainz > 0) // skip this test if C is full
#endif
#else
// A is bitmap or full
#ifdef GB_A_NOT_TRANSPOSED
// A(i,:) starts at position i
const int64_t pA = i ;
#else
// A(:,i) starts at position i * vlen
const int64_t pA = i * vlen ;
#endif
#endif
{
// C(i,j) = A(:,i)'*B(:,j) or A(i,:)*B(:,j)
bool cij_exists = false ;
GB_CIJ_DECLARE (cij) ;
#include "GB_AxB_dot_cij.c"
}
}
}
}
#if (!GB_C_IS_FULL)
cnvals += task_cnvals ;
#endif
}
}
#endif
#undef GB_A_IS_SPARSE
#undef GB_A_IS_HYPER
#undef GB_A_IS_BITMAP
#undef GB_A_IS_FULL
#undef GB_B_IS_SPARSE
#undef GB_B_IS_HYPER
#undef GB_B_IS_BITMAP
#undef GB_B_IS_FULL
#undef GB_DOT_ALWAYS_SAVE_CIJ
#undef GB_DOT_SAVE_CIJ
|