File: GB_rowscale_template.c

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (75 lines) | stat: -rw-r--r-- 2,686 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
//------------------------------------------------------------------------------
// GB_rowscale_template: C=D*B where D is a square diagonal matrix
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// This template is not used If C is iso, since all that is needed is to create
// C as a shallow-copy of the pattern of A.

// B and C can be jumbled.  D cannot, but it is a diagonal matrix so it is
// never jumbled.

{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (GB_JUMBLED_OK (C)) ;
    ASSERT (!GB_JUMBLED (D)) ;
    ASSERT (GB_JUMBLED_OK (B)) ;
    ASSERT (!C->iso) ;

    //--------------------------------------------------------------------------
    // get D and B
    //--------------------------------------------------------------------------

    const GB_A_TYPE *restrict Dx = (GB_A_TYPE *) D->x ;
    const GB_B_TYPE *restrict Bx = (GB_B_TYPE *) B->x ;
          GB_C_TYPE *restrict Cx = (GB_C_TYPE *) C->x ;

    #ifdef GB_JIT_KERNEL
    #define D_iso GB_A_ISO
    #define B_iso GB_B_ISO
    #else
    const bool D_iso = D->iso ;
    const bool B_iso = B->iso ;
    #endif

    GB_Bi_DECLARE (Bi, const) ; GB_Bi_PTR (Bi, B) ;
    GB_B_NVALS (bnz) ;      // const int64_t bnz = GB_nnz (B) ;
    const int64_t bvlen = B->vlen ;

    //--------------------------------------------------------------------------
    // C=D*B
    //--------------------------------------------------------------------------

    int ntasks = nthreads ;
    ntasks = GB_IMIN (bnz, ntasks) ;

    // parition the space into ntasks, to do this in parallel:
    // for (p = 0 ; p < bnz ; p++) { ... work on Bi [p] and Bx [p] ... }

    int tid ;
    #pragma omp parallel for num_threads(nthreads) schedule(static)
    for (tid = 0 ; tid < ntasks ; tid++)
    {
        int64_t pstart, pend ;
        GB_PARTITION (pstart, pend, bnz, tid, ntasks) ;
        GB_PRAGMA_SIMD_VECTORIZE
        for (int64_t p = pstart ; p < pend ; p++)
        { 
            int64_t i = GBi_B (Bi, p, bvlen) ;      // get row index of B(i,j)
            GB_DECLAREA (dii) ;
            GB_GETA (dii, Dx, i, D_iso) ;           // dii = D(i,i)
            GB_DECLAREB (bij) ;
            GB_GETB (bij, Bx, p, B_iso) ;           // bij = B(i,j)
            GB_EWISEOP (Cx, p, dii, bij, 0, 0) ;    // C(i,j) = dii*bij
        }
    }
}