1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
//------------------------------------------------------------------------------
// GB_reduce_build_template.c: Tx=build(Sx), and assemble any duplicate tuples
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// This template is used in GB_builder and the Generated2/GB_red__* workers.
// This is the same for both vectors and matrices, since this step is agnostic
// about which vectors the entries appear.
// Sx and Tx are either both iso or both non-iso. For the iso case,
// GB_ISO_BUILD is defined, and K_work is NULL. The iso case is not handled by
// the Generated2/ GB_red__* workers, since it doesn't access the values at all.
{
// k unused for some uses of this template
#include "GB_unused.h"
if (ndupl == 0)
{
//----------------------------------------------------------------------
// no duplicates, just permute Sx into Tx
//----------------------------------------------------------------------
// If no duplicates are present, then GB_builder has already
// transplanted I_work into T->i, so this step does not need to
// construct T->i. The tuple values, in Sx, are copied or permuted
// into T->x. This step is skipped if T and Sx are iso.
#ifndef GB_ISO_BUILD
if (K_work == NULL)
{
int tid ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
int64_t tstart = tstart_slice [tid] ;
int64_t tend = tstart_slice [tid+1] ;
for (int64_t t = tstart ; t < tend ; t++)
{
// Tx [t] = (ttype) Sx [t] ; with typecast
GB_CAST_ARRAY_TO_ARRAY (Tx, t, Sx, t) ;
}
}
}
else
{
int tid ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
int64_t tstart = tstart_slice [tid] ;
int64_t tend = tstart_slice [tid+1] ;
for (int64_t t = tstart ; t < tend ; t++)
{
// Tx [t] = (ttype) Sx [K_work [t]] ; with typecast
GB_CAST_ARRAY_TO_ARRAY (Tx, t, Sx, K_work [t]) ;
}
}
}
#endif
}
else
{
//----------------------------------------------------------------------
// assemble duplicates
//----------------------------------------------------------------------
// If T and Sx as non-iso, entries in Sx must be copied into T->x, with
// any duplicates summed via the operator. T->i must also be
// constructed. T->x and Sx are not modified if they are iso.
int tid ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
int64_t my_tnz = tnz_slice [tid] ;
int64_t tstart = tstart_slice [tid] ;
int64_t tend = tstart_slice [tid+1] ;
// find the first unique tuple owned by this slice
int64_t t ;
for (t = tstart ; t < tend ; t++)
{
// get the tuple and break if it is not a duplicate
if (I_work [t] >= 0) break ;
}
// scan all tuples and assemble any duplicates
for ( ; t < tend ; t++)
{
// get the t-th tuple, a unique tuple
int64_t i = I_work [t] ;
ASSERT (i >= 0) ;
#ifndef GB_ISO_BUILD
int64_t k = (K_work == NULL) ? t : K_work [t] ;
// Tx [my_tnz] = Sx [k] ; with typecast
GB_CAST_ARRAY_TO_ARRAY (Tx, my_tnz, Sx, k) ;
#endif
Ti [my_tnz] = i ;
// assemble all duplicates that follow it. This may assemble
// the first duplicates in the next slice(s) (up to but not
// including the first unique tuple in the subsequent slice(s)).
for ( ; t+1 < nvals && I_work [t+1] < 0 ; t++)
{
// assemble the duplicate tuple
#ifndef GB_ISO_BUILD
int64_t k = (K_work == NULL) ? (t+1) : K_work [t+1] ;
// Tx [my_tnz] += Sx [k] with typecast
GB_ADD_CAST_ARRAY_TO_ARRAY (Tx, my_tnz, Sx, k) ;
#endif
}
my_tnz++ ;
}
}
}
}
#undef GB_ISO_BUILD
|