1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
//------------------------------------------------------------------------------
// GB_subref_phase2: find # of entries in C=A(I,J)
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// GB_subref_phase2 counts the number of entries in each vector of C, for
// C=A(I,J) and then does a cumulative sum to find Cp. A is sparse or
// hypersparse.
// Cp is either freed by phase2, or transplanted into C.
#include "extract/GB_subref.h"
GrB_Info GB_subref_phase2 // count nnz in each C(:,j)
(
// computed by phase2:
void **Cp_handle, // output of size Cnvec+1
bool *p_Cp_is_32, // if true, Cp is 32-bit; else 64 bit
size_t *Cp_size_handle,
int64_t *Cnvec_nonempty, // # of non-empty vectors in C
// tasks from phase1:
GB_task_struct *restrict TaskList, // array of structs
const int ntasks, // # of tasks
const int nthreads, // # of threads to use
const void *Ihead, // for I inverse buckets, size A->vlen
const void *Inext, // for I inverse buckets, size nI
const bool Ihead_is_32, // if true, Ihead,Inext 32-bit; else 64
const bool I_has_duplicates, // true if I has duplicates
uint64_t **p_Cwork, // workspace of size max(2,C->nvec+1)
size_t Cwork_size,
// analysis from phase0:
const void *Ap_start,
const void *Ap_end,
const int64_t Cnvec,
const bool need_qsort,
const int Ikind,
const int64_t nI,
const int64_t Icolon [3],
const int64_t nJ,
// original input:
const GrB_Matrix A,
const void *I, // index list for C = A(I,J), or GrB_ALL, etc.
const bool I_is_32, // if true, I is 32-bit; else 64-bit
const bool symbolic,
GB_Werk Werk
)
{
//--------------------------------------------------------------------------
// check inputs
//--------------------------------------------------------------------------
ASSERT (Cp_handle != NULL) ;
ASSERT (Cp_size_handle != NULL) ;
ASSERT_MATRIX_OK (A, "A for subref phase2", GB0) ;
ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A)) ;
GB_IDECL (I , const, u) ; GB_IPTR (I , I_is_32) ;
GB_IDECL (Ap_start, const, u) ; GB_IPTR (Ap_start, A->p_is_32) ;
GB_IDECL (Ap_end , const, u) ; GB_IPTR (Ap_end , A->p_is_32) ;
GB_IDECL (Ihead , const, u) ; GB_IPTR (Ihead , Ihead_is_32) ;
GB_IDECL (Inext , const, u) ; GB_IPTR (Inext , Ihead_is_32) ;
(*Cp_handle) = NULL ;
(*Cp_size_handle) = 0 ;
uint64_t *restrict Cwork = (*p_Cwork) ;
const bool Ai_is_32 = A->i_is_32 ;
ASSERT (Cwork != NULL) ;
// clear Cwork [k] for fine tasks that compute vector k
#ifdef GB_DEBUG
GB_memset (Cwork, 0xFF, (Cnvec+1) * sizeof (uint64_t), nthreads) ;
#endif
for (int taskid = 0 ; taskid < ntasks ; taskid++)
{
int64_t kfirst = TaskList [taskid].kfirst ;
int64_t klast = TaskList [taskid].klast ;
bool fine_task = (klast < 0) ;
if (fine_task)
{
// The set of fine tasks that compute C(:,kC) do not compute Cwork
// [kC] directly. Instead, they compute their partial results in
// TaskList [taskid].pC, which is then summed by GB_task_cumsum.
// That method sums up the work of each fine task and adds it to
// Cwork [kC], which must be initialized here to zero.
Cwork [kfirst] = 0 ;
}
}
//--------------------------------------------------------------------------
// count the entries in each vector of C
//--------------------------------------------------------------------------
#define GB_I_KIND Ikind
#define GB_NEED_QSORT need_qsort
#define GB_I_HAS_DUPLICATES I_has_duplicates
#define GB_ANALYSIS_PHASE
if (symbolic)
{
#define GB_SYMBOLIC
// symbolic extraction must handle zombies
const bool may_see_zombies = (A->nzombies > 0) ;
#include "extract/template/GB_subref_template.c"
}
else
{
// iso and non-iso numeric extraction do not see zombies
ASSERT (!GB_ZOMBIES (A)) ;
#include "extract/template/GB_subref_template.c"
}
//--------------------------------------------------------------------------
// cumulative sum of Cwork and fine tasks in TaskList
//--------------------------------------------------------------------------
Cwork [Cnvec] = 0 ;
GB_task_cumsum (Cwork, false, Cnvec, Cnvec_nonempty, TaskList, ntasks,
nthreads, Werk) ;
int64_t cnz = Cwork [Cnvec] ;
//--------------------------------------------------------------------------
// allocate the final result Cp
//--------------------------------------------------------------------------
// determine the final p_is_32 setting for the new matrix;
// j_is_32 and i_is_32 have already been determined by GB_subref_phase0
bool Cp_is_32, Cj_is_32, Ci_is_32 ;
ASSERT (p_Cp_is_32 != NULL) ;
GB_determine_pji_is_32 (&Cp_is_32, &Cj_is_32, &Ci_is_32,
GxB_AUTO_SPARSITY, cnz, nI, nJ, Werk) ;
void *Cp = NULL ; size_t Cp_size = 0 ;
if (Cp_is_32)
{
// Cp is 32-bit; allocate and typecast from Cwork
Cp = GB_MALLOC_MEMORY (GB_IMAX (2, Cnvec+1), sizeof (uint32_t),
&Cp_size) ;
if (Cp == NULL)
{
// out of memory
return (GrB_OUT_OF_MEMORY) ;
}
int nthreads_max = GB_Context_nthreads_max ( ) ;
GB_cast_int (Cp, GB_UINT32_code, Cwork, GB_UINT64_code, Cnvec+1,
nthreads_max) ;
}
else
{
// Cp is 64-bit; transplant Cwork as Cp
Cp = Cwork ;
Cp_size = Cwork_size ;
(*p_Cwork) = NULL ;
}
//--------------------------------------------------------------------------
// return the result
//--------------------------------------------------------------------------
(*Cp_handle ) = Cp ;
(*Cp_size_handle) = Cp_size ;
(*p_Cp_is_32 ) = Cp_is_32 ;
return (GrB_SUCCESS) ;
}
|