1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
|
//------------------------------------------------------------------------------
// GB_ek_slice_merge2: merge final results for matrix C
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// Prior to calling this function, a method using GB_ek_slice to slice an input
// matrix A has computed the vector counts Cp, where Cp [k] is the number of
// entries in the kth vector of C on input to this function.
// The input matrix and the matrix C is sliced by kfirst_Aslice and
// klast_Aslice, where kfirst = kfirst_Aslice [tid] is the first vector in A
// and C computed by task tid, and klast = klast_Aslice [tid] is the last
// vector computed by task tid. Tasks tid and tid+1 may cooperate on a single
// vector, however, where klast_Aslice [tid] may be the same as kfirst_Aslice
// [tid]. The method has also computed Wfirst [tid] and Wlast [tid] for each
// task id, tid. Wfirst [tid] is the number of entries task tid contributes to
// C(:,kfirst), and Wlast [tid] is the number of entries task tid contributes
// to C(:,klast).
// On output, Cp [0:cnvec] is overwritten with its cumulative sum.
// C_nvec_nonempty is the count of how many vectors in C are empty.
// Cp_kfirst [tid] is the position in C where task tid owns entries in
// C(:,kfirst), which is a cumulative sum of the entries computed in C(:,k) for
// all tasks that cooperate to compute that vector, starting at Cp [kfirst].
// There is no need to compute this for C(:,klast): if kfirst < klast, then
// either task tid fully owns C(:,klast), or it is always the first task to
// operate on C(:,klast). In both cases, task tid starts its computations at
// the top of C(:,klast), which can be found at Cp [klast].
#include "GB_ek_slice.h"
void GB_ek_slice_merge2 // merge final results for matrix C
(
// output
int64_t *C_nvec_nonempty, // # of non-empty vectors in C
int64_t *restrict Cp_kfirst, // size ntasks
// input/output
int64_t *restrict Cp, // size cnvec+1
// input
const int64_t cnvec,
const int64_t *restrict Wfirst, // size ntasks
const int64_t *restrict Wlast, // size ntasks
const int64_t *A_ek_slicing, // size 3*ntasks+1
const int ntasks, // # of tasks used to construct C
const int nthreads, // # of threads to use
GB_Context Context
)
{
//--------------------------------------------------------------------------
// Cp = cumsum (Cp)
//--------------------------------------------------------------------------
GB_cumsum (Cp, cnvec, C_nvec_nonempty, nthreads, Context) ;
//--------------------------------------------------------------------------
// determine the slice boundaries in the new C matrix
//--------------------------------------------------------------------------
const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
const int64_t *restrict klast_Aslice = A_ek_slicing + ntasks ;
// const int64_t *restrict pstart_Aslice = A_ek_slicing + ntasks * 2 ;
int64_t kprior = -1 ;
int64_t pC = 0 ;
for (int tid = 0 ; tid < ntasks ; tid++)
{
int64_t kfirst = kfirst_Aslice [tid] ;
if (kprior < kfirst)
{
// Task tid is the first one to do work on C(:,kfirst), so it
// starts at Cp [kfirst], and it contributes Wfirst [tid] entries
// to C(:,kfirst).
pC = Cp [kfirst] ;
kprior = kfirst ;
}
// Task tid contributes Wfirst [tid] entries to C(:,kfirst)
Cp_kfirst [tid] = pC ;
pC += Wfirst [tid] ;
int64_t klast = klast_Aslice [tid] ;
if (kfirst < klast)
{
// Task tid is the last to contribute to C(:,kfirst).
ASSERT (pC == Cp [kfirst+1]) ;
// Task tid contributes the first Wlast [tid] entries to
// C(:,klast), so the next task tid+1 starts at location Cp [klast]
// + Wlast [tid], if its first vector is klast of this task.
pC = Cp [klast] + Wlast [tid] ;
kprior = klast ;
}
}
}
|