File: magma_smslice.cpp

package info (click to toggle)
magma 2.9.0%2Bds-2
  • links: PTS, VCS
  • area: contrib
  • in suites: trixie
  • size: 83,212 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 942; xml: 253; csh: 232; sh: 203; perl: 104
file content (225 lines) | stat: -rw-r--r-- 6,836 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/*
    -- MAGMA (version 2.9.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date January 2025

       @generated from sparse/control/magma_zmslice.cpp, normal z -> s, Wed Jan 22 14:42:28 2025
       @author Hartwig Anzt
*/
#include "magmasparse_internal.h"

#define THRESHOLD 10e-99


/**
    Purpose
    -------

    Takes a matrix and extracts a slice for solving the system in parallel:
    
        B = A( i:i+n, : ) and ALOC = A(i:i+n,i:i+n) and ANLOCA(0:start - end:n,:)
        
    B is of size n x n, ALOC of size end-start x end-start,
    ANLOC of size end-start x n
        
    The last slice might be smaller. For the non-local parts, B is the identity.
    comm contains 1ess in the locations that are non-local but needed to 
    solve local system.


    Arguments
    ---------
    
    @param[in]
    num_slices  magma_int_t
                number of slices
    
    @param[in]
    slice       magma_int_t
                slice id (0.. num_slices-1)

    @param[in]
    A           magma_s_matrix
                sparse matrix in CSR

    @param[out]
    B           magma_s_matrix*
                sparse matrix in CSR
                
    @param[out]
    ALOC        magma_s_matrix*
                sparse matrix in CSR
                
    @param[out]
    ANLOC       magma_s_matrix*
                sparse matrix in CSR
                
    @param[in,out]
    comm_i      magma_int_t*
                communication plan
 
    @param[in,out]
    comm_v      float*
                communication plan

    @param[in]
    queue       magma_queue_t
                Queue to execute in.
                
    @param[out]
    start       magma_int_t*
                start of slice (row-index)
                
    @param[out]
    end         magma_int_t*
                end of slice (row-index)

    @ingroup magmasparse_saux
    ********************************************************************/

extern "C" magma_int_t
magma_smslice(
    magma_int_t num_slices,
    magma_int_t slice,
    magma_s_matrix A, 
    magma_s_matrix *B,
    magma_s_matrix *ALOC,
    magma_s_matrix *ANLOC,
    magma_index_t *comm_i,
    float *comm_v,
    magma_int_t *start,
    magma_int_t *end,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    // make sure the target structure is empty
    magma_smfree( B, queue );
    
    if( A.num_rows != A.num_cols ){
        printf("%%  error: only supported for square matrices.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
    if ( A.memory_location == Magma_CPU
            && A.storage_type == Magma_CSR ){
        CHECK( magma_smconvert( A, B, Magma_CSR, Magma_CSR, queue ) );
        magma_free_cpu( B->col );
        magma_free_cpu( B->val );
        CHECK( magma_smconvert( A, ALOC, Magma_CSR, Magma_CSR, queue ) );
        magma_free_cpu( ALOC->col );
        magma_free_cpu( ALOC->row );
        magma_free_cpu( ALOC->val );
        CHECK( magma_smconvert( A, ANLOC, Magma_CSR, Magma_CSR, queue ) );
        magma_free_cpu( ANLOC->col );
        magma_free_cpu( ANLOC->row );
        magma_free_cpu( ANLOC->val );
        
        magma_int_t i,j,k, nnz, nnz_loc=0, loc_row = 0, nnz_nloc = 0;
        magma_index_t col;
        magma_int_t size = magma_ceildiv( A.num_rows, num_slices ); 
        magma_int_t lstart = slice*size;
        magma_int_t lend = min( (slice+1)*size, A.num_rows );
        // correct size for last slice
        size = lend-lstart;
        CHECK( magma_index_malloc_cpu( &ALOC->row, size+1 ) );
        CHECK( magma_index_malloc_cpu( &ANLOC->row, size+1 ) );
        
        // count elements for slice - identity for rest
        nnz = A.row[ lend ] - A.row[ lstart ] + ( A.num_rows - size );
        CHECK( magma_index_malloc_cpu( &B->col, nnz ) );
        CHECK( magma_smalloc_cpu( &B->val, nnz ) );         
        
        // for the communication plan
        for( i=0; i<A.num_rows; i++ ) {
            comm_i[i] = 0;
            comm_v[i] = MAGMA_S_ZERO;
        }
        
        k=0;
        B->row[i] = 0;
        ALOC->row[0] = 0;
        ANLOC->row[0] = 0;
        // identity above slice
        for( i=0; i<lstart; i++ ) {
            B->row[i+1]   = B->row[i]+1;
            B->val[k] = MAGMA_S_ONE;
            B->col[k] = i;
            k++;
        }
        
        // slice        
        for( i=lstart; i<lend; i++ ) {
            B->row[i+1]   = B->row[i] + (A.row[i+1]-A.row[i]);
            for( j=A.row[i]; j<A.row[i+1]; j++ ){
                B->val[k] = A.val[j];
                col = A.col[j];
                B->col[k] = col;
                // communication plan
                if( col<lstart || col>=lend ){
                    comm_i[ col ] = 1;
                    comm_v[ col ] = comm_v[ col ] 
                            + MAGMA_S_MAKE( MAGMA_S_ABS( A.val[j] ), 0.0 );
                    nnz_nloc++;
                } else {
                    nnz_loc++;   
                }
                k++;
            }
            loc_row++;
            ALOC->row[ loc_row ] = nnz_loc;
            ANLOC->row[ loc_row ] = nnz_nloc;
        }
        CHECK( magma_index_malloc_cpu( &ALOC->col, nnz_loc ) );
        CHECK( magma_smalloc_cpu( &ALOC->val, nnz_loc ) ); 
        ALOC->num_rows = size;
        ALOC->num_cols = size;
        ALOC->nnz = nnz_loc;
        
        CHECK( magma_index_malloc_cpu( &ANLOC->col, nnz_nloc ) );
        CHECK( magma_smalloc_cpu( &ANLOC->val, nnz_nloc ) ); 
        ANLOC->num_rows = size;
        ANLOC->num_cols = A.num_cols;
        ANLOC->nnz = nnz_nloc;
        
        nnz_loc = 0;
        nnz_nloc = 0;
        // local/nonlocal matrix        
        for( i=lstart; i<lend; i++ ) {
            for( j=A.row[i]; j<A.row[i+1]; j++ ){
                col = A.col[j];
                // insert only in local part in ALOC, nonlocal in ANLOC
                if( col<lstart || col>=lend ){
                    ANLOC->val[ nnz_nloc ] = A.val[j];
                    ANLOC->col[ nnz_nloc ] = col;  
                    nnz_nloc++;
                } else {
                    ALOC->val[ nnz_loc ] = A.val[j];
                    ALOC->col[ nnz_loc ] = col-lstart;  
                    nnz_loc++;
                }
            }
        }
        
        // identity below slice
        for( i=lend; i<A.num_rows; i++ ) {
            B->row[i+1] = B->row[i]+1;
            B->val[k] = MAGMA_S_ONE;
            B->col[k] = i;
            k++;
        }
        B->nnz = k;
        *start = lstart;
        *end = lend;
    }
    else {
        printf("error: mslice only supported for CSR matrices on the CPU: %d %d.\n", 
                int(A.memory_location), int(A.storage_type) );
        info = MAGMA_ERR_NOT_SUPPORTED;
    }
cleanup:
    return info;
}