1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
//------------------------------------------------------------------------------
// GB_AxB_saxpy4_panel.c: H += A*G or H = A*G for one panel
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// This methods handles both C bitmap (mxm/template/GB_AxB_saxpy4_panel.c) and
// C full (mxm/template/GB_AxB_saxbit_A_sparse_B_bitmap_template.c). Those
// methods define the GB_HX_COMPUTE macro for use in this method, which
// computes the single update: H (i,jj) += A(i,k) * B(k,j). The values of B
// have already been loaded into the panel G.
{
switch (np)
{
case 4 :
for (int64_t kA = 0 ; kA < anvec ; kA++)
{
// get A(:,k)
const int64_t k = GBh_A (Ah, kA) ;
// get B(k,j1:j2-1)
#if GB_B_IS_BITMAP
const int8_t gb0 = Gb [k ] ;
const int8_t gb1 = Gb [k + bvlen] ;
const int8_t gb2 = Gb [k + 2*bvlen] ;
const int8_t gb3 = Gb [k + 3*bvlen] ;
if (!(gb0 || gb1 || gb2 || gb3)) continue ;
#endif
GB_DECLAREB (gk0) ;
GB_DECLAREB (gk1) ;
GB_DECLAREB (gk2) ;
GB_DECLAREB (gk3) ;
GB_GETB (gk0, Gx, k , B_iso) ;
GB_GETB (gk1, Gx, k + bvlen, B_iso) ;
GB_GETB (gk2, Gx, k + 2*bvlen, B_iso) ;
GB_GETB (gk3, Gx, k + 3*bvlen, B_iso) ;
// H += A(:,k)*B(k,j1:j2-1)
const int64_t pA_end = GB_IGET (Ap, kA+1) ;
for (int64_t pA = GB_IGET (Ap, kA) ; pA < pA_end ; pA++)
{
const int64_t i = GB_IGET (Ai, pA) ;
const int64_t pH = i * 4 ;
GB_DECLAREA (aik) ;
GB_GETA (aik, Ax, pA, A_iso) ;
GB_HX_COMPUTE (pH, i, gk0, gb0, 0) ;
GB_HX_COMPUTE (pH, i, gk1, gb1, 1) ;
GB_HX_COMPUTE (pH, i, gk2, gb2, 2) ;
GB_HX_COMPUTE (pH, i, gk3, gb3, 3) ;
}
}
break ;
case 3 :
for (int64_t kA = 0 ; kA < anvec ; kA++)
{
// get A(:,k)
const int64_t k = GBh_A (Ah, kA) ;
// get B(k,j1:j2-1)
#if GB_B_IS_BITMAP
const int8_t gb0 = Gb [k ] ;
const int8_t gb1 = Gb [k + bvlen] ;
const int8_t gb2 = Gb [k + 2*bvlen] ;
if (!(gb0 || gb1 || gb2)) continue ;
#endif
GB_DECLAREB (gk0) ;
GB_DECLAREB (gk1) ;
GB_DECLAREB (gk2) ;
GB_GETB (gk0, Gx, k , B_iso) ;
GB_GETB (gk1, Gx, k + bvlen, B_iso) ;
GB_GETB (gk2, Gx, k + 2*bvlen, B_iso) ;
// H += A(:,k)*B(k,j1:j2-1)
const int64_t pA_end = GB_IGET (Ap, kA+1) ;
for (int64_t pA = GB_IGET (Ap, kA) ; pA < pA_end ; pA++)
{
const int64_t i = GB_IGET (Ai, pA) ;
const int64_t pH = i * 3 ;
GB_DECLAREA (aik) ;
GB_GETA (aik, Ax, pA, A_iso) ;
GB_HX_COMPUTE (pH, i, gk0, gb0, 0) ;
GB_HX_COMPUTE (pH, i, gk1, gb1, 1) ;
GB_HX_COMPUTE (pH, i, gk2, gb2, 2) ;
}
}
break ;
case 2 :
for (int64_t kA = 0 ; kA < anvec ; kA++)
{
// get A(:,k)
const int64_t k = GBh_A (Ah, kA) ;
// get B(k,j1:j2-1)
#if GB_B_IS_BITMAP
const int8_t gb0 = Gb [k ] ;
const int8_t gb1 = Gb [k + bvlen] ;
if (!(gb0 || gb1)) continue ;
#endif
// H += A(:,k)*B(k,j1:j2-1)
GB_DECLAREB (gk0) ;
GB_DECLAREB (gk1) ;
GB_GETB (gk0, Gx, k , B_iso) ;
GB_GETB (gk1, Gx, k + bvlen, B_iso) ;
const int64_t pA_end = GB_IGET (Ap, kA+1) ;
for (int64_t pA = GB_IGET (Ap, kA) ; pA < pA_end ; pA++)
{
const int64_t i = GB_IGET (Ai, pA) ;
const int64_t pH = i * 2 ;
GB_DECLAREA (aik) ;
GB_GETA (aik, Ax, pA, A_iso) ;
GB_HX_COMPUTE (pH, i, gk0, gb0, 0) ;
GB_HX_COMPUTE (pH, i, gk1, gb1, 1) ;
}
}
break ;
case 1 :
for (int64_t kA = 0 ; kA < anvec ; kA++)
{
// get A(:,k)
const int64_t k = GBh_A (Ah, kA) ;
// get B(k,j1)
#if GB_B_IS_BITMAP
const int8_t gb0 = Gb [k] ;
if (!gb0) continue ;
#endif
// H += A(:,k)*B(k,j1)
GB_DECLAREB (gk0) ;
GB_GETB (gk0, Gx, k, B_iso) ;
const int64_t pA_end = GB_IGET (Ap, kA+1) ;
for (int64_t pA = GB_IGET (Ap, kA) ; pA < pA_end ; pA++)
{
// aik = A (i,j)
GB_DECLAREA (aik) ;
GB_GETA (aik, Ax, pA, A_iso) ;
// H (i) += aik * gk0
const int64_t i = GB_IGET (Ai, pA) ;
GB_HX_COMPUTE (i, i, gk0, 1, 0) ;
}
}
break ;
default:;
}
}
|