File: GB_AxB_saxpy3_fineGus_M_phase2.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 503; asm: 369; python: 125; awk: 10
file content (94 lines) | stat: -rw-r--r-- 4,876 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
//------------------------------------------------------------------------------
// GB_AxB_saxpy3_fineGus_M_phase2: C<M>=A*B, fine Gustavson, phase2
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

{

    //--------------------------------------------------------------------------
    // phase2: fine Gustavson task, C(:,j)<M(:,j)>=A*B(:,j)
    //--------------------------------------------------------------------------

    // Hf [i] is 0 if M(i,j) not present or M(i,j)=0.
    // 0 -> 1 : has already been done in phase0 if M(i,j)=1.

    // 0 -> 0 : to ignore, if M(i,j)=0
    // 1 -> 3 : to lock, if i seen for first time
    // 2 -> 3 : to lock, if i seen already
    // 3 -> 2 : to unlock; now i has been seen

    GB_GET_M_j ;                // get M(:,j)
    GB_GET_M_j_RANGE (16) ;     // get first and last in M(:,j)
    for ( ; pB < pB_end ; pB++)     // scan B(:,j)
    { 
        GB_GET_B_kj_INDEX ;         // get index k of B(k,j)
        GB_GET_A_k ;                // get A(:,k)
        if (aknz == 0) continue ;
        GB_GET_B_kj ;               // bkj = B(k,j)

        #if GB_IS_ANY_MONOID

            //------------------------------------------------------------------
            // C(i,j) += A(i,k)*B(k,j) ; with the ANY monoid
            //------------------------------------------------------------------

            #define GB_IKJ                                              \
                int8_t f ;                                              \
                GB_ATOMIC_READ                                          \
                f = Hf [i] ;                /* grab the entry */        \
                if (f == 0 || f == 2) continue ;                        \
                GB_ATOMIC_WRITE                                         \
                Hf [i] = 2 ;                /* unlock the entry */      \
                GB_MULT_A_ik_B_kj ;         /* t = A(i,k) * B(k,j) */   \
                GB_ATOMIC_WRITE_HX (i, t) ; /* Hx [i] = t */

        #else

            //------------------------------------------------------------------
            // C(i,j) += A(i,k)*B(k,j) ; all other monoids
            //------------------------------------------------------------------

            #define GB_IKJ                                              \
            {                                                           \
                GB_MULT_A_ik_B_kj ;     /* t = A(i,k) * B(k,j) */       \
                int8_t f ;                                              \
                GB_ATOMIC_READ                                          \
                f = Hf [i] ;            /* grab the entry */            \
                if (GB_HAS_ATOMIC && (f == 2))                          \
                {                                                       \
                    /* C(i,j) already seen; update it */                \
                    GB_ATOMIC_UPDATE_HX (i, t) ;    /* Hx [i] += t */   \
                    continue ;       /* C(i,j) has been updated */      \
                }                                                       \
                if (f == 0) continue ; /* M(i,j)=0; ignore C(i,j) */    \
                do  /* lock the entry */                                \
                {                                                       \
                    /* do this atomically: */                           \
                    /* { f = Hf [i] ; Hf [i] = 3 ; } */                 \
                    GB_ATOMIC_CAPTURE_INT8 (f, Hf [i], 3) ;             \
                } while (f == 3) ; /* lock owner gets f=1 or 2 */       \
                if (f == 1)                                             \
                {                                                       \
                    /* C(i,j) is a new entry */                         \
                    GB_ATOMIC_WRITE_HX (i, t) ;     /* Hx [i] = t */    \
                }                                                       \
                else /* f == 2 */                                       \
                {                                                       \
                    /* C(i,j) already appears in C(:,j) */              \
                    GB_ATOMIC_UPDATE_HX (i, t) ;    /* Hx [i] += t */   \
                }                                                       \
                GB_ATOMIC_WRITE                                         \
                Hf [i] = 2 ;                /* unlock the entry */      \
            }

        #endif

        GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ;
        #undef GB_IKJ
    }
}