1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
//------------------------------------------------------------------------------
// GB_iso_expand: expand a scalar into an entire array
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
#include "GB.h"
#include "iso/GB_is_nonzero.h"
#include "jitifyer/GB_stringify.h"
#include "unaryop/GB_unop.h"
GrB_Info GB_iso_expand // expand an iso scalar into an entire array
(
void *restrict X, // output array to expand into
int64_t n, // # of entries in X
void *restrict scalar, // scalar to expand into X
GrB_Type xtype // the type of the X and the scalar
)
{
//--------------------------------------------------------------------------
// determine how many threads to use
//--------------------------------------------------------------------------
int nthreads_max = GB_Context_nthreads_max ( ) ;
double chunk = GB_Context_chunk ( ) ;
//--------------------------------------------------------------------------
// copy the scalar into X
//--------------------------------------------------------------------------
GrB_Info info = GrB_SUCCESS ;
size_t size = xtype->size ;
if (GB_is_nonzero (scalar, size))
{
//----------------------------------------------------------------------
// the scalar is nonzero
//----------------------------------------------------------------------
int64_t p ;
int nthreads = GB_nthreads (n, chunk, nthreads_max) ;
switch (size)
{
case GB_1BYTE : // bool, uint8, int8, and UDT of size 1
{
uint8_t a0 = (*((uint8_t *) scalar)) ;
uint8_t *restrict Z = (uint8_t *) X ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (p = 0 ; p < n ; p++)
{
Z [p] = a0 ;
}
}
break ;
case GB_2BYTE : // uint16, int16, and UDT of size 2
{
uint16_t a0 = (*((uint16_t *) scalar)) ;
uint16_t *restrict Z = (uint16_t *) X ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (p = 0 ; p < n ; p++)
{
Z [p] = a0 ;
}
}
break ;
case GB_4BYTE : // uint32, int32, float, and UDT of size 4
{
uint32_t a0 = (*((uint32_t *) scalar)) ;
uint32_t *restrict Z = (uint32_t *) X ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (p = 0 ; p < n ; p++)
{
Z [p] = a0 ;
}
}
break ;
case GB_8BYTE : // uint64, int64, double, float complex, UDT size 8
{
uint64_t a0 = (*((uint64_t *) scalar)) ;
uint64_t *restrict Z = (uint64_t *) X ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (p = 0 ; p < n ; p++)
{
Z [p] = a0 ;
}
}
break ;
case GB_16BYTE : // double complex, and UDT size 16
{
GB_blob16 a0 = (*((GB_blob16 *) scalar)) ;
GB_blob16 *restrict Z = (GB_blob16 *) X ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (p = 0 ; p < n ; p++)
{
Z [p] = a0 ;
}
}
break ;
default : // user-defined types of arbitrary size
{
// via the JIT kernel
struct GB_UnaryOp_opaque op_header ;
GB_Operator op = GB_unop_identity (xtype, &op_header) ;
info = GB_iso_expand_jit (X, n, scalar, xtype, op, nthreads) ;
if (info == GrB_NO_VALUE)
{
// via the generic kernel
GB_void *restrict Z = (GB_void *) X ;
#pragma omp parallel for num_threads(nthreads) \
schedule(static)
for (p = 0 ; p < n ; p++)
{
memcpy (Z + p*size, scalar, size) ;
}
info = GrB_SUCCESS ;
}
}
break ;
}
}
else
{
//----------------------------------------------------------------------
// the scalar is zero: use memset
//----------------------------------------------------------------------
GB_memset (X, 0, n*size, nthreads_max) ;
}
return (info) ;
}
|