File: GB_AxB_saxpy3_fineGus_phase2.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 503; asm: 369; python: 125; awk: 10
file content (90 lines) | stat: -rw-r--r-- 3,394 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
//------------------------------------------------------------------------------
// GB_AxB_saxpy3_fineGus_phase2: C=A*B, saxpy3 method, fine Gustavson, phase2
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

{

    //--------------------------------------------------------------------------
    // phase2: fine Gustavson task, C(:,j)=A*B(:,j)
    //--------------------------------------------------------------------------

    // Hf [i] is initially 0.
    // 0 -> 3 : to lock, if i seen for first time
    // 2 -> 3 : to lock, if i seen already
    // 3 -> 2 : to unlock; now i has been seen

    for ( ; pB < pB_end ; pB++)     // scan B(:,j)
    {
        GB_GET_B_kj_INDEX ;         // get index k of B(k,j)
        GB_GET_A_k ;                // get A(:,k)
        if (aknz == 0) continue ;
        GB_GET_B_kj ;               // bkj = B(k,j)
        // scan A(:,k)
        for (int64_t pA = pA_start ; pA < pA_end ; pA++)
        {
            GB_GET_A_ik_INDEX ;     // get index i of A(i,k)
            GB_MULT_A_ik_B_kj ;     // t = A(i,k) * B(k,j)
            int8_t f ;

            #if GB_IS_ANY_MONOID

                //--------------------------------------------------------------
                // C(i,j) += t ; with the ANY monoid
                //--------------------------------------------------------------

                GB_ATOMIC_READ
                f = Hf [i] ;            // grab the entry
                if (f == 2) continue ;  // check if already updated
                GB_ATOMIC_WRITE_HX (i, t) ;    // Hx [i] = t

            #else

                //--------------------------------------------------------------
                // C(i,j) += t ; with all other monoids
                //--------------------------------------------------------------

                #if GB_HAS_ATOMIC

                    // if C(i,j) is already present (f==2), and the monoid can
                    // be done atomically, then do the atomic update.  No need
                    // to modify Hf [i].
                    GB_ATOMIC_READ
                    f = Hf [i] ;        // grab the entry
                    if (f == 2)         // if true, update C(i,j)
                    {
                        GB_ATOMIC_UPDATE_HX (i, t) ; // Hx [i] += t
                        continue ;      // C(i,j) has been updated
                    }

                #endif

                do  // lock the entry
                { 
                    // do this atomically:
                    // { f = Hf [i] ; Hf [i] = 3 ; }
                    GB_ATOMIC_CAPTURE_INT8 (f, Hf [i], 3) ;
                } while (f == 3) ; // lock owner gets f=0 or 2
                if (f == 0)
                { 
                    // C(i,j) is a new entry
                    GB_ATOMIC_WRITE_HX (i, t) ;    // Hx [i] = t
                }
                else // f == 2
                { 
                    // C(i,j) already appears in C(:,j)
                    GB_ATOMIC_UPDATE_HX (i, t) ;   // Hx [i] += t
                }

            #endif

            GB_ATOMIC_WRITE
            Hf [i] = 2 ;            // flag/unlock the entry
        }
    }
}