File: GB_subassign_26.c

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (168 lines) | stat: -rw-r--r-- 5,976 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
//------------------------------------------------------------------------------
// GB_subassign_26: C(:,j1:j2) = A ; append columns, no S
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------

// Method 26: C(:,j1:j2) = A ; append columns, no S

// M:           NULL
// Mask_comp:   false
// C_replace:   false
// accum:       NULL
// A:           matrix
// S:           constructed

// C: hypersparse
// A: sparse

#include "assign/GB_subassign_methods.h"
#define GB_GENERIC
#define GB_SCALAR_ASSIGN 0
#include "assign/include/GB_assign_shared_definitions.h"

#undef  GB_FREE_ALL
#define GB_FREE_ALL ;
#define GB_MEM_CHUNK (1024*1024)

GrB_Info GB_subassign_26
(
    GrB_Matrix C,
    // input:
    const int64_t Jcolon [3],       // j1:j2, with an increment of 1
    const GrB_Matrix A,
    GB_Werk Werk
)
{ 

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    GrB_Info info ;
    ASSERT (GB_IS_HYPERSPARSE (C)) ;
    ASSERT (GB_IS_SPARSE (A)) ;
    ASSERT (!GB_any_aliased (C, A)) ;   // NO ALIAS of C==A
    ASSERT (!GB_PENDING (A)) ;          // FUTURE: could tolerate pending tuples
    ASSERT (!GB_ZOMBIES (A)) ;          // FUTURE: could tolerate zombies
    ASSERT (A->type == C->type) ;       // no typecasting
    ASSERT (!A->iso) ;                  // FUTURE: handle iso case
    ASSERT (!C->iso) ;                  // FUTURE: handle iso case

    //--------------------------------------------------------------------------
    // get inputs
    //--------------------------------------------------------------------------

    const size_t csize = C->type->size ;
    int64_t Cnvec = C->nvec ;
    int64_t cnz = C->nvals ;

    GB_Ap_DECLARE (Ap, const) ; GB_Ap_PTR (Ap, A) ;
    void *Ai = A->i ;
    GB_void *restrict Ax = (GB_void *) A->x ;
    int64_t anz = A->nvals ;
    bool Ai_is_32 = A->i_is_32 ;
    size_t aisize = (Ai_is_32) ? sizeof (uint32_t) : sizeof (uint64_t) ;
    GB_Type_code aicode = (Ai_is_32) ? GB_UINT32_code : GB_UINT64_code ;

    int64_t j1 = Jcolon [GxB_BEGIN] ;
    int64_t j2 = Jcolon [GxB_END  ] ;
    ASSERT (Jcolon [GxB_INC] == 1) ;
    int64_t nJ = j2 - j1 + 1 ;
    ASSERT (nJ == A->vdim) ;

    //--------------------------------------------------------------------------
    // Method 26: C(:,j1:j2) = A ; append column(s), no S.
    //--------------------------------------------------------------------------

    // Time: Optimal.  Work is O(nnz(A)).

    //--------------------------------------------------------------------------
    // resize C if necessary
    //--------------------------------------------------------------------------

    int64_t cnz_new = cnz + anz ;

    if (Cnvec + nJ > C->plen)
    { 
        // double the size of C->h and C->p if needed
        int64_t plen_new = GB_IMIN (C->vdim, 2*(C->plen + nJ)) ;
        GB_OK (GB_hyper_realloc (C, plen_new, Werk)) ;
    }

    if (cnz_new > GB_nnz_max (C))
    { 
        // double the size of C->i and C->x if needed
        GB_OK (GB_ix_realloc (C, 2*cnz_new + 1)) ;
    }

    GB_Cp_DECLARE (Cp, ) ; GB_Cp_PTR (Cp, C) ;
    GB_Ch_DECLARE (Ch, ) ; GB_Ch_PTR (Ch, C) ;
    GB_Ci_DECLARE (Ci, ) ; GB_Ci_PTR (Ci, C) ;
    GB_void *restrict Cx = (GB_void *) C->x ;
    bool Ci_is_32 = C->i_is_32 ;
    GB_Type_code cicode = (Ci_is_32) ? GB_UINT32_code : GB_UINT64_code ;

    //--------------------------------------------------------------------------
    // determine any parallelism to use
    //--------------------------------------------------------------------------

    ASSERT (Cnvec == 0 || GB_IGET (Ch, Cnvec-1) == j1-1) ;

    bool phase1_parallel = (nJ > GB_CHUNK_DEFAULT) ;
    bool phase2_parallel = (anz * (aisize + csize) > GB_MEM_CHUNK) ;
    int nthreads_max = GB_Context_nthreads_max ( ) ;
    double chunk = GB_Context_chunk ( ) ;

    //--------------------------------------------------------------------------
    // phase1: append to Cp and Ch; find # new nonempty vectors, and properties
    //--------------------------------------------------------------------------

    int64_t Anvec_nonempty = 0 ;
    int nthreads = (phase1_parallel) ?
        GB_nthreads (nJ, chunk, nthreads_max) : 1 ;
    int64_t k ;

    // compute Cp, Ch, and Anvec_nonempty in parallel
    #pragma omp parallel for num_threads(nthreads) schedule(static) \
        reduction(+:Anvec_nonempty)
    for (k = 0 ; k < nJ ; k++)
    { 
        int64_t apk = GB_IGET (Ap, k) ;
        int64_t anzk = GB_IGET (Ap, k+1) - apk ;
        GB_ISET (Ch, Cnvec + k, j1 + k) ;
        GB_ISET (Cp, Cnvec + k, cnz + apk) ;
        Anvec_nonempty += (anzk > 0) ;
    }

    int64_t C_nvec_nonempty = GB_nvec_nonempty_get (C) ;
    if (C_nvec_nonempty >= 0)
    { 
//      C->nvec_nonempty += Anvec_nonempty ;
        GB_nvec_nonempty_set (C, C_nvec_nonempty + Anvec_nonempty) ;
    }
    C->nvec += nJ ;
    GB_ISET (Cp, C->nvec, cnz_new) ;
    C->nvals = cnz_new ;
    C->jumbled = C->jumbled || A->jumbled ;

    //--------------------------------------------------------------------------
    // phase2: append the indices and values to the end of Ci and Cx
    //--------------------------------------------------------------------------

    nthreads = (phase2_parallel) ? GB_nthreads (anz, chunk, nthreads_max) : 1 ;

    // copy Ci and Cx
    GB_cast_int (GB_IADDR (Ci, cnz), cicode, Ai, aicode, anz, nthreads) ;
    GB_memcpy (Cx + cnz * csize, Ax, anz * csize, nthreads) ;

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    return (GrB_SUCCESS) ;
}