1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
//------------------------------------------------------------------------------
// GB_task_struct.h: parallel task descriptor
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
#ifndef GB_TASK_STRUCT_H
#define GB_TASK_STRUCT_H
// The element-wise computations (GB_add, GB_emult, and GB_mask) compute
// C(:,j)<M(:,j)> = op (A (:,j), B(:,j)). They are parallelized by slicing the
// work into tasks, described by the GB_task_struct.
// There are two kinds of tasks. For a coarse task, kfirst <= klast, and the
// task computes all vectors in C(:,kfirst:klast), inclusive. None of the
// vectors are sliced and computed by other tasks. For a fine task, klast is
// -1. The task computes part of the single vector C(:,kfirst). It starts at
// pA in Ai,Ax, at pB in Bi,Bx, and (if M is present) at pM in Mi,Mx. It
// computes C(:,kfirst), starting at pC in Ci,Cx.
// GB_subref also uses the TaskList. It has 12 kinds of fine tasks,
// corresponding to each of the 12 methods used in GB_subref_template. For
// those fine tasks, method = -TaskList [taskid].klast defines the method to
// use.
// The GB_subassign functions use the TaskList, in many different ways.
typedef struct // task descriptor
{
int64_t kfirst ; // C(:,kfirst) is the first vector in this task.
int64_t klast ; // C(:,klast) is the last vector in this task.
int64_t pC ; // fine task starts at Ci, Cx [pC]
int64_t pC_end ; // fine task ends at Ci, Cx [pC_end-1]
int64_t pM ; // fine task starts at Mi, Mx [pM]
int64_t pM_end ; // fine task ends at Mi, Mx [pM_end-1]
int64_t pA ; // fine task starts at Ai, Ax [pA]
int64_t pA_end ; // fine task ends at Ai, Ax [pA_end-1]
int64_t pB ; // fine task starts at Bi, Bx [pB]
int64_t pB_end ; // fine task ends at Bi, Bx [pB_end-1]
int64_t len ; // fine task handles a subvector of this length
}
GB_task_struct ;
// GB_REALLOC_TASK_WORK: Allocate or reallocate the TaskList so that it can
// hold at least ntasks. Double the size if it's too small.
#define GB_REALLOC_TASK_WORK(TaskList,ntasks,max_ntasks) \
{ \
if ((ntasks) >= max_ntasks) \
{ \
bool ok ; \
int nold = (max_ntasks == 0) ? 0 : (max_ntasks + 1) ; \
int nnew = 2 * (ntasks) + 1 ; \
GB_REALLOC_WORK (TaskList, nnew, GB_task_struct, &TaskList_size, \
&ok, NULL) ; \
if (!ok) \
{ \
/* out of memory */ \
GB_FREE_ALL ; \
return (GrB_OUT_OF_MEMORY) ; \
} \
for (int t = nold ; t < nnew ; t++) \
{ \
TaskList [t].kfirst = -1 ; \
TaskList [t].klast = INT64_MIN ; \
TaskList [t].pA = INT64_MIN ; \
TaskList [t].pA_end = INT64_MIN ; \
TaskList [t].pB = INT64_MIN ; \
TaskList [t].pB_end = INT64_MIN ; \
TaskList [t].pC = INT64_MIN ; \
TaskList [t].pC_end = INT64_MIN ; \
TaskList [t].pM = INT64_MIN ; \
TaskList [t].pM_end = INT64_MIN ; \
TaskList [t].len = INT64_MIN ; \
} \
max_ntasks = 2 * (ntasks) ; \
} \
ASSERT ((ntasks) < max_ntasks) ; \
}
GrB_Info GB_ewise_slice
(
// output:
GB_task_struct **p_TaskList, // array of structs
size_t *p_TaskList_size, // size of TaskList
int *p_ntasks, // # of tasks constructed
int *p_nthreads, // # of threads for eWise operation
// input:
const int64_t Cnvec, // # of vectors of C
const int64_t *restrict Ch, // vectors of C, if hypersparse
const int64_t *restrict C_to_M, // mapping of C to M
const int64_t *restrict C_to_A, // mapping of C to A
const int64_t *restrict C_to_B, // mapping of C to B
bool Ch_is_Mh, // if true, then Ch == Mh; GB_add only
const GrB_Matrix M, // mask matrix to slice (optional)
const GrB_Matrix A, // matrix to slice
const GrB_Matrix B, // matrix to slice
GB_Context Context
) ;
GB_PUBLIC
void GB_slice_vector
(
// output: return i, pA, and pB
int64_t *p_i, // work starts at A(i,kA) and B(i,kB)
int64_t *p_pM, // M(i:end,kM) starts at pM
int64_t *p_pA, // A(i:end,kA) starts at pA
int64_t *p_pB, // B(i:end,kB) starts at pB
// input:
const int64_t pM_start, // M(:,kM) starts at pM_start in Mi,Mx
const int64_t pM_end, // M(:,kM) ends at pM_end-1 in Mi,Mx
const int64_t *restrict Mi, // indices of M (or NULL)
const int64_t pA_start, // A(:,kA) starts at pA_start in Ai,Ax
const int64_t pA_end, // A(:,kA) ends at pA_end-1 in Ai,Ax
const int64_t *restrict Ai, // indices of A
const int64_t pB_start, // B(:,kB) starts at pB_start in Bi,Bx
const int64_t pB_end, // B(:,kB) ends at pB_end-1 in Bi,Bx
const int64_t *restrict Bi, // indices of B
const int64_t vlen, // A->vlen and B->vlen
const double target_work // target work
) ;
void GB_task_cumsum
(
int64_t *Cp, // size Cnvec+1
const int64_t Cnvec,
int64_t *Cnvec_nonempty, // # of non-empty vectors in C
GB_task_struct *restrict TaskList, // array of structs
const int ntasks, // # of tasks
const int nthreads, // # of threads
GB_Context Context
) ;
//------------------------------------------------------------------------------
// GB_GET_VECTOR: get the content of a vector for a coarse/fine task
//------------------------------------------------------------------------------
#define GB_GET_VECTOR(pX_start, pX_fini, pX, pX_end, Xp, kX, Xvlen) \
int64_t pX_start, pX_fini ; \
if (fine_task) \
{ \
/* A fine task operates on a slice of X(:,k) */ \
pX_start = TaskList [taskid].pX ; \
pX_fini = TaskList [taskid].pX_end ; \
} \
else \
{ \
/* vectors are never sliced for a coarse task */ \
pX_start = GBP (Xp, kX, Xvlen) ; \
pX_fini = GBP (Xp, kX+1, Xvlen) ; \
}
#endif
|