File: GB_transpose_bitmap.c

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (57 lines) | stat: -rw-r--r-- 1,976 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
//------------------------------------------------------------------------------
// GB_transpose_bitmap: C=op(cast(A')), transpose, typecast, and apply op
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

{

    //------------------------------------------------------------------
    // A and C are both bitmap
    //------------------------------------------------------------------

    ASSERT (GB_IS_BITMAP (A)) ;
    ASSERT (GB_IS_BITMAP (C)) ;

    // A is avlen-by-avdim; C is avdim-by-avlen
    int64_t avlen = A->vlen ;
    int64_t avdim = A->vdim ;
    int64_t anz = avlen * avdim ;   // ignore integer overflow

    const int8_t *restrict Ab = A->b ;
    int8_t *restrict Cb = C->b ;

    // TODO: it would be faster to do this by tiles, not rows/columns,
    // for large matrices, but in most of the cases in GraphBLAS, A and
    // C will be tall-and-thin or short-and-fat.

    int tid ;
    #pragma omp parallel for num_threads(nthreads) schedule(static)
    for (tid = 0 ; tid < nthreads ; tid++)
    {
        int64_t pC_start, pC_end ;
        GB_PARTITION (pC_start, pC_end, anz, tid, nthreads) ;
        for (int64_t pC = pC_start ; pC < pC_end ; pC++)
        {
            // get i and j of the entry C(i,j)
            // i = (pC % avdim) ;
            // j = (pC / avdim) ;
            // find the position of the entry A(j,i) 
            // pA = j + i * avlen
            int64_t pA = ((pC / avdim) + (pC % avdim) * avlen) ;
            int8_t cij_exists = Ab [pA] ;
            Cb [pC] = cij_exists ;
            #ifndef GB_ISO_TRANSPOSE
            if (cij_exists)
            { 
                // Cx [pC] = op (Ax [pA])
                GB_APPLY_OP (pC, pA) ;
            }
            #endif
        }
    }
}