File: GB_ek_slice_kernels.h

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (160 lines) | stat: -rw-r--r-- 8,232 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//------------------------------------------------------------------------------
// GB_ek_slice_kernels.h: slice the entries and vectors of a matrix
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

#ifndef GB_EK_SLICE_KERNELS_H
#define GB_EK_SLICE_KERNELS_H

static inline void GB_e_slice
(
    int64_t *Slice,         // array of size ntasks+1
    int64_t e,              // number items to partition amongst the tasks
    const int ntasks        // # of tasks
)
{
    Slice [0] = 0 ;
    for (int tid = 1 ; tid < ntasks ; tid++)
    { 
        Slice [tid] = (int64_t) GB_PART (tid, e, ntasks) ;
    }
    Slice [ntasks] = e ;
}

//------------------------------------------------------------------------------
// GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice
//------------------------------------------------------------------------------

static inline void GB_ek_slice_ntasks
(
    // output
    int *nthreads,              // # of threads to use for GB_ek_slice
    int *ntasks,                // # of tasks to create for GB_ek_slice
    // input
    int64_t anz_held,           // GB_nnz_held(A) of the matrix to slice
    int ntasks_per_thread,      // # of tasks per thread
    double work,                // total work to do
    double chunk,               // give each thread at least this much work
    int nthreads_max            // max # of threads to use
)
{
    if (anz_held == 0)
    {
        (*nthreads) = 1 ;
        (*ntasks) = 1 ;
    }
    else
    {
        (*nthreads) = GB_nthreads (work, chunk, nthreads_max) ;
        (*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ;
        (*ntasks) = (int) GB_IMIN (*ntasks, anz_held) ;
        (*ntasks) = (int) GB_IMAX (*ntasks, 1) ;
    }
}

//------------------------------------------------------------------------------
// GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice
//------------------------------------------------------------------------------

// chunk and nthreads_max must already be defined.

#define GB_SLICE_MATRIX_WORK2(X,ntasks_per_thread,work,xnz_held)              \
    GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), xnz_held,         \
        ntasks_per_thread, work, chunk, nthreads_max) ;                       \
    GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ;            \
    if (X ## _ek_slicing == NULL)                                             \
    {                                                                         \
        /* out of memory */                                                   \
        GB_FREE_ALL ;                                                         \
        return (GrB_OUT_OF_MEMORY) ;                                          \
    }                                                                         \
    GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;

#define GB_SLICE_MATRIX_WORK(X,ntasks_per_thread,work,xnz_held)               \
    GB_SLICE_MATRIX_WORK2 (X,ntasks_per_thread,work,xnz_held)                 \
    const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ;                 \
    const int64_t *klast_  ## X ## slice = X ## _ek_slicing + X ## _ntasks ;  \
    const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;

#define GB_SLICE_MATRIX(X,ntasks_per_thread)                                  \
    const int64_t X ## _held = GB_nnz_held (X) ;                              \
    const double  X ## _wrk = X ## _held + X->nvec ;                          \
    GB_SLICE_MATRIX_WORK (X, ntasks_per_thread, X ## _wrk, X ## _held)

#define GB_SLICE_MATRIX2(X,ntasks_per_thread)                                 \
    const int64_t X ## _held = GB_nnz_held (X) ;                              \
    const double  X ## _wrk = X ## _held + X->nvec ;                          \
    GB_SLICE_MATRIX_WORK2 (X, ntasks_per_thread, X ## _wrk, X ## _held)

//------------------------------------------------------------------------------
// GB_GET_PA_AND_PC: find the part of A(:,k) and C(:,k) for this task
//------------------------------------------------------------------------------

// The tasks were generated by GB_ek_slice.

// as a macro, where p0, p1, and p2 are first obtained as above:
//  p0 = GBp (Ap, k, avlen) ;
//  p1 = GBp (Ap, k+1, avlen) ;
//  p2 = GBp (Cp, k, cvlen) ;

#define GB_GET_PA_AND_PC(pA_start,pA_end,pC,tid,k,kfirst,klast,pstart_slice,Cp_kfirst,p0,p1,p2)    \
    int64_t pA_start, pA_end, pC ;                                          \
    if (k == kfirst)                                                        \
    {                                                                       \
        /* First vector for task tid; may only be partially owned. */       \
        pA_start = pstart_slice [tid] ;                                     \
        pA_end   = GB_IMIN (p1, pstart_slice [tid+1]) ;                     \
        pC = Cp_kfirst [tid] ;                                              \
    }                                                                       \
    else if (k == klast)                                                    \
    {                                                                       \
        /* Last vector for task tid; may only be partially owned. */        \
        pA_start = p0 ;                                                     \
        pA_end   = pstart_slice [tid+1] ;                                   \
        pC = p2 ;                                                           \
    }                                                                       \
    else                                                                    \
    {                                                                       \
        /* task tid entirely owns this vector A(:,k). */                    \
        pA_start = p0 ;                                                     \
        pA_end   = p1 ;                                                     \
        pC = p2 ;                                                           \
    }

//------------------------------------------------------------------------------
// GB_GET_PA: find the part of A(:,k) to be operated on by this task
//------------------------------------------------------------------------------

// The tasks were generated by GB_ek_slice.

// as a macro, where p0 and p1 are first obtained as above:
//  p0 = GBp (Ap, k, avlen) ;
//  p1 = GBp (Ap, k+1, avlen) ;

#define GB_GET_PA(pA_start,pA_end,tid,k,kfirst,klast,pstart_slice,p0,p1)    \
    int64_t pA_start, pA_end ;                                              \
    if (k == kfirst)                                                        \
    {                                                                       \
        /* First vector for task tid; may only be partially owned. */       \
        pA_start = pstart_slice [tid] ;                                     \
        pA_end   = GB_IMIN (p1, pstart_slice [tid+1]) ;                     \
    }                                                                       \
    else if (k == klast)                                                    \
    {                                                                       \
        /* Last vector for task tid; may only be partially owned. */        \
        pA_start = p0 ;                                                     \
        pA_end   = pstart_slice [tid+1] ;                                   \
    }                                                                       \
    else                                                                    \
    {                                                                       \
        /* task tid entirely owns this vector A(:,k). */                    \
        pA_start = p0 ;                                                     \
        pA_end   = p1 ;                                                     \
    }

#endif