File: GB_iso_expand.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (127 lines) | stat: -rw-r--r-- 4,396 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//------------------------------------------------------------------------------
// GB_iso_expand: expand a scalar into an entire array
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

#include "GB.h"
#include "GB_is_nonzero.h"

void GB_iso_expand          // expand an iso scalar into an entire array
(
    void *restrict X,       // output array to expand into
    int64_t n,              // # of entries in X
    void *restrict scalar,  // scalar to expand into X
    size_t size,            // size of the scalar and each entry of X
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // determine how many threads to use
    //--------------------------------------------------------------------------

    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;

    //--------------------------------------------------------------------------
    // copy the scalar into X
    //--------------------------------------------------------------------------

    if (GB_is_nonzero (scalar, size))
    {

        //----------------------------------------------------------------------
        // the scalar is nonzero
        //----------------------------------------------------------------------

        int64_t p ;
        int nthreads = GB_nthreads (n, chunk, nthreads_max) ;
        switch (size)
        {

            case GB_1BYTE : // bool, uint8, int8, and UDT of size 1
            {
                uint8_t a0 = (*((uint8_t *) scalar)) ;
                uint8_t *restrict Z = (uint8_t *) X ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (p = 0 ; p < n ; p++)
                {
                    Z [p] = a0 ;
                }
            }
            break ;

            case GB_2BYTE : // uint16, int16, and UDT of size 2
            {
                uint16_t a0 = (*((uint16_t *) scalar)) ;
                uint16_t *restrict Z = (uint16_t *) X ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (p = 0 ; p < n ; p++)
                {
                    Z [p] = a0 ;
                }
            }
            break ;

            case GB_4BYTE : // uint32, int32, float, and UDT of size 4
            {
                uint32_t a0 = (*((uint32_t *) scalar)) ;
                uint32_t *restrict Z = (uint32_t *) X ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (p = 0 ; p < n ; p++)
                {
                    Z [p] = a0 ;
                }
            }
            break ;

            case GB_8BYTE : // uint64, int64, double, float complex, UDT size 8
            {
                uint64_t a0 = (*((uint64_t *) scalar)) ;
                uint64_t *restrict Z = (uint64_t *) X ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (p = 0 ; p < n ; p++)
                {
                    Z [p] = a0 ;
                }
            }
            break ;

            case GB_16BYTE : // double complex, and UDT size 16
            {
                GB_blob16 a0 = (*((GB_blob16 *) scalar)) ;
                GB_blob16 *restrict Z = (GB_blob16 *) X ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (p = 0 ; p < n ; p++)
                {
                    Z [p] = a0 ;
                }
            }
            break ;

            default : // user-defined types of arbitrary size
            {
                GB_void *restrict Z = (GB_void *) X ;
                #pragma omp parallel for num_threads(nthreads) schedule(static)
                for (p = 0 ; p < n ; p++)
                {
                    memcpy (Z + p*size, scalar, size) ;
                }
            }
            break ;
        }
    }
    else
    {

        //----------------------------------------------------------------------
        // the scalar is zero: use memset
        //----------------------------------------------------------------------

        GB_memset (X, 0, n*size, nthreads_max) ;
    }
}