File: GB_AxB_saxpy4_tasks.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (122 lines) | stat: -rw-r--r-- 4,687 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//------------------------------------------------------------------------------
// GB_AxB_saxpy4_tasks: construct tasks for saxpy4 and bitmap_saxpy
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// GB_AxB_saxpy4_tasks constructs the tasks for GB_AxB_saxpy4, and for
// GB_AxB_bitmap_saxpy when A is sparse/hyper and B is bitmap/full.

#include "GB_AxB_saxpy.h"

void GB_AxB_saxpy4_tasks
(
    // output
    int *p_ntasks,                  // # of tasks to use
    int *p_nthreads,                // # of threads to use
    int *p_nfine_tasks_per_vector,  // # of tasks per vector (fine case only)
    bool *p_use_coarse_tasks,       // if true, use coarse tasks
    bool *p_use_atomics,            // if true, use atomics
    // input
    int64_t anz,                    // # of entries in A (sparse or hyper)
    int64_t bnz,                    // # of entries held in B
    int64_t bvdim,                  // # of vectors of B (bitmap or full)
    int64_t cvlen,                  // # of vectors of C (bitmap or full)
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // determine the work to do
    //--------------------------------------------------------------------------

    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
    double work = ((double) anz) * (double) bvdim ;
    int nthreads = GB_nthreads (work, chunk, nthreads_max) ;
    int nfine_tasks_per_vector = 0, ntasks ;
    bool use_coarse_tasks, use_atomics = false ;

    //--------------------------------------------------------------------------
    // create the tasks
    //--------------------------------------------------------------------------

    if (nthreads == 1 || bvdim == 0)
    { 

        //----------------------------------------------------------------------
        // do the entire computation with a single thread, with coarse task
        //----------------------------------------------------------------------

        ntasks = 1 ;
        use_coarse_tasks = true ;
        GBURBLE ("(coarse, threads: 1) ") ;

    }
    else if (nthreads <= 2 * bvdim)
    { 

        //----------------------------------------------------------------------
        // all tasks are coarse
        //----------------------------------------------------------------------

        // Each coarse task does 1 or more whole vectors of B
        ntasks = GB_IMIN (bvdim, 2 * nthreads) ;
        nthreads = GB_IMIN (ntasks, nthreads) ;
        use_coarse_tasks = true ;
        GBURBLE ("(coarse, threads: %d, tasks %d) ", nthreads, ntasks) ;

    }
    else
    { 

        //----------------------------------------------------------------------
        // use fine tasks
        //----------------------------------------------------------------------

        // Each task does a slice of a single vector of B, and each vector of B
        // is handled by the same # of fine tasks.  Determine if atomics are
        // to be used or not.

        use_coarse_tasks = false ;
        double cnz = ((double) cvlen) * ((double) bvdim) ;
        double intensity = work / fmax (cnz, 1) ;
        double workspace = ((double) cvlen) * ((double) nthreads) ;
        double relwspace = workspace / fmax (anz + bnz + cnz, 1) ;
        GBURBLE ("(threads: %d, relwspace: %0.3g, intensity: %0.3g",
            nthreads, relwspace, intensity) ;

        if (intensity > 2 && relwspace < 0.5)
        { 
            // fine non-atomic method with workspace
            ntasks = nthreads ;
            GBURBLE (": fine non-atomic, ") ;
        }
        else
        { 
            // fine atomic method, with no workspace
            use_atomics = true ;
            ntasks = 4 * nthreads ;
            GBURBLE (": fine atomic, ") ;
        }

        nfine_tasks_per_vector = ceil ((double) ntasks / (double) bvdim) ;
        ntasks = bvdim * nfine_tasks_per_vector ;
        ASSERT (nfine_tasks_per_vector > 1) ;
        GBURBLE ("tasks: %d, tasks per vector: %d) ", ntasks,
            nfine_tasks_per_vector) ;
    }

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    (*p_ntasks) = ntasks ;
    (*p_nthreads) = nthreads ;
    (*p_nfine_tasks_per_vector) = nfine_tasks_per_vector ;
    (*p_use_coarse_tasks) = use_coarse_tasks ;
    (*p_use_atomics) = use_atomics ;
}