File: GB_I_inverse.c

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (125 lines) | stat: -rw-r--r-- 4,553 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
//------------------------------------------------------------------------------
// GB_I_inverse: invert an index list
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// I is a large list relative to the vector length, avlen, and it is not
// contiguous.  Scatter I into the I inverse buckets (Mark and Inext) for quick
// lookup.

// FUTURE:: this code is sequential.  Constructing the I inverse buckets in
// parallel would require synchronization (a critical section for each bucket,
// or atomics).  A more parallel approach might use qsort first, to find
// duplicates in I, and then construct the buckets in parallel after the qsort.
// But the time complexity would be higher.

#include "GB_subref.h"

GrB_Info GB_I_inverse           // invert the I list for C=A(I,:)
(
    const GrB_Index *I,         // list of indices, duplicates OK
    int64_t nI,                 // length of I
    int64_t avlen,              // length of the vectors of A
    // outputs:
    int64_t *restrict *p_Mark,  // head pointers for buckets, size avlen
    size_t *p_Mark_size,
    int64_t *restrict *p_Inext, // next pointers for buckets, size nI
    size_t *p_Inext_size,
    int64_t *p_ndupl,           // number of duplicate entries in I
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // get inputs
    //--------------------------------------------------------------------------

    int64_t *Mark  = NULL ; size_t Mark_size = 0 ;
    int64_t *Inext = NULL ; size_t Inext_size = 0 ;
    int64_t ndupl = 0 ;

    (*p_Mark ) = NULL ; (*p_Mark_size ) = 0 ;
    (*p_Inext) = NULL ; (*p_Inext_size) = 0 ;
    (*p_ndupl) = 0 ;

    //--------------------------------------------------------------------------
    // allocate workspace
    //--------------------------------------------------------------------------

    Mark  = GB_CALLOC_WORK (avlen, int64_t, &Mark_size) ;
    Inext = GB_MALLOC_WORK (nI,    int64_t, &Inext_size) ;
    if (Inext == NULL || Mark == NULL)
    { 
        // out of memory
        GB_FREE_WORK (&Mark, Mark_size) ;
        GB_FREE_WORK (&Inext, Inext_size) ;
        return (GrB_OUT_OF_MEMORY) ;
    }

    //--------------------------------------------------------------------------
    // scatter the I indices into buckets
    //--------------------------------------------------------------------------

    // at this point, Mark is all zero, so Mark [i] < 1 for all i in
    // the range 0 to avlen-1.

    // O(nI) time; not parallel
    for (int64_t inew = nI-1 ; inew >= 0 ; inew--)
    {
        int64_t i = I [inew] ;
        ASSERT (i >= 0 && i < avlen) ;
        int64_t ihead = (Mark [i] - 1) ;
        if (ihead < 0)
        { 
            // first time i has been seen in the list I
            ihead = -1 ;
        }
        else
        { 
            // i has already been seen in the list I
            ndupl++ ;
        }
        Mark [i] = inew + 1 ;       // (Mark [i] - 1) = inew
        Inext [inew] = ihead ;
    }

    // indices in I are now in buckets.  An index i might appear more than once
    // in the list I.  inew = (Mark [i] - 1) is the first position of i in I (i
    // will be I [inew]), (Mark [i] - 1) is the head of a link list of all
    // places where i appears in I.  inew = Inext [inew] traverses this list,
    // until inew is -1.

    // to traverse all entries in bucket i, do:
    // GB_for_each_index_in_bucket (inew,i)) { ... }

    #define GB_for_each_index_in_bucket(inew,i) \
        for (int64_t inew = Mark [i] - 1 ; inew >= 0 ; inew = Inext [inew])

    // If Mark [i] < 1, then the ith bucket is empty and i is not in I.
    // Otherise, the first index in bucket i is (Mark [i] - 1).

    #ifdef GB_DEBUG
    for (int64_t i = 0 ; i < avlen ; i++)
    {
        GB_for_each_index_in_bucket (inew, i)
        {
            ASSERT (inew >= 0 && inew < nI) ;
            ASSERT (i == I [inew]) ;
        }
    }
    #endif

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    (*p_Mark ) = Mark  ; (*p_Mark_size ) = Mark_size ;
    (*p_Inext) = Inext ; (*p_Inext_size) = Inext_size ;
    (*p_ndupl) = ndupl ;
    return (GrB_SUCCESS) ;
}