File: GB_add_bitmap_noM_22.c

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (103 lines) | stat: -rw-r--r-- 3,619 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
//------------------------------------------------------------------------------
// GB_add_bitmap_noM_22: C=A+B, C bitmap, A bitmap, B sparse/hyper
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// C is bitmap.
// A is bitmap.  B is sparse/hyper.

{

    //--------------------------------------------------------------------------
    // Method22: C and A are bitmap; B is sparse or hypersparse
    //--------------------------------------------------------------------------

    #ifdef GB_ISO_ADD
        GB_memcpy (Cb, Ab, cnz, C_nthreads) ;
    #else
        int64_t p ;
        #pragma omp parallel for num_threads(C_nthreads) schedule(static)
        for (p = 0 ; p < cnz ; p++)
        { 
            int8_t a = Ab [p] ;
            if (a)
            { 
                #if GB_IS_EWISEUNION
                { 
                    // C (i,j) = A(i,j) + beta
                    GB_LOAD_A (aij, Ax, p, A_iso) ;
                    GB_EWISEOP (Cx, p, aij, beta_scalar, p % vlen, p / vlen) ;
                }
                #else
                { 
                    // C (i,j) = A (i,j)
                    GB_COPY_A_to_C (Cx, p, Ax, p, A_iso) ;
                }
                #endif
            }
            Cb [p] = a ;
        }
    #endif
    cnvals = A->nvals ;

    const int64_t *kfirst_Bslice = B_ek_slicing ;
    const int64_t *klast_Bslice  = B_ek_slicing + B_ntasks ;
    const int64_t *pstart_Bslice = B_ek_slicing + B_ntasks*2 ;

    #pragma omp parallel for num_threads(B_nthreads) schedule(dynamic,1) \
        reduction(+:cnvals)
    for (taskid = 0 ; taskid < B_ntasks ; taskid++)
    {
        int64_t kfirst = kfirst_Bslice [taskid] ;
        int64_t klast  = klast_Bslice  [taskid] ;
        int64_t task_cnvals = 0 ;
        for (int64_t k = kfirst ; k <= klast ; k++)
        {
            // find the part of B(:,k) for this task
            int64_t j = GBh_B (Bh, k) ;
            GB_GET_PA (pB_start, pB_end, taskid, k, kfirst, klast,
                pstart_Bslice, GB_IGET (Bp, k), GB_IGET (Bp, k+1)) ;
            int64_t pC_start = j * vlen ;
            // traverse over B(:,j), the kth vector of B
            for (int64_t pB = pB_start ; pB < pB_end ; pB++)
            {
                int64_t i = GB_IGET (Bi, pB) ;
                int64_t p = pC_start + i ;
                if (Cb [p])
                { 
                    // C (i,j) = A (i,j) + B (i,j)
                    #ifndef GB_ISO_ADD
                    GB_LOAD_A (aij, Ax, p , A_iso) ;
                    GB_LOAD_B (bij, Bx, pB, B_iso) ;
                    GB_EWISEOP (Cx, p, aij, bij, i, j) ;
                    #endif
                }
                else
                { 
                    #ifndef GB_ISO_ADD
                    #if GB_IS_EWISEUNION
                    { 
                        // C (i,j) = alpha + B(i,j)
                        GB_LOAD_B (bij, Bx, pB, B_iso) ;
                        GB_EWISEOP (Cx, p, alpha_scalar, bij, i, j) ;
                    }
                    #else
                    { 
                        // C (i,j) = B (i,j)
                        GB_COPY_B_to_C (Cx, p, Bx, pB, B_iso) ;
                    }
                    #endif
                    #endif
                    Cb [p] = 1 ;
                    task_cnvals++ ;
                }
            }
        }
        cnvals += task_cnvals ;
    }
}