File: GB_ek_slice_merge1.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (83 lines) | stat: -rw-r--r-- 3,206 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
//------------------------------------------------------------------------------
// GB_ek_slice_merge1: merge column counts for a matrix
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// The input matrix A has been sliced via GB_ek_slice, and scanned to compute
// the counts of entries in each vector of C in Cp, Wfirst, and Wlast.  This
// phase finalizes the column counts, Cp, merging the results of each task.

// On input, Cp [k] has been partially computed.  Task tid operators on vector
// kfirst = kfirst_Aslice [tid] to klast = klast_Aslice [tid].  If kfirst < k <
// klast, then Cp [k] is the total count of entries in C(:,k).  Otherwise, the
// counts are held in Wfirst and Wlast, and Cp [k] is zero (or uninititalized).
// Wfirst [tid] is the number of entries in C(:,kfirst) constructed by task
// tid, and Wlast [tid] is the number of entries in C(:,klast) constructed by
// task tid.

// This function sums up the entries computed for C(:,k) by all tasks, so that
// on output, Cp [k] is the total count of entries in C(:,k).

#include "GB_ek_slice.h"

void GB_ek_slice_merge1     // merge column counts for the matrix C
(
    // input/output:
    int64_t *restrict Cp,                    // column counts
    // input:
    const int64_t *restrict Wfirst,          // size A_ntasks
    const int64_t *restrict Wlast,           // size A_ntasks
    const int64_t *A_ek_slicing,                // size 3*A_ntasks+1
    const int A_ntasks                          // # of tasks
)
{

    const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
    const int64_t *restrict klast_Aslice  = A_ek_slicing + A_ntasks ;

    int64_t kprior = -1 ;

    for (int tid = 0 ; tid < A_ntasks ; tid++)
    {

        //----------------------------------------------------------------------
        // sum up the partial result that thread tid computed for kfirst
        //----------------------------------------------------------------------

        int64_t kfirst = kfirst_Aslice [tid] ;
        int64_t klast  = klast_Aslice  [tid] ;

        if (kfirst <= klast)
        {
            if (kprior < kfirst)
            { 
                // This thread is the first one that did work on
                // A(:,kfirst), so use it to start the reduction.
                Cp [kfirst] = Wfirst [tid] ;
            }
            else
            { 
                Cp [kfirst] += Wfirst [tid] ;
            }
            kprior = kfirst ;
        }

        //----------------------------------------------------------------------
        // sum up the partial result that thread tid computed for klast
        //----------------------------------------------------------------------

        if (kfirst < klast)
        {
            ASSERT (kprior < klast) ;
            // This thread is the first one that did work on
            // A(:,klast), so use it to start the reduction.
            Cp [klast] = Wlast [tid] ;
            kprior = klast ;
        }
    }
}