1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
|
/*
-- MAGMA (version 2.9.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2025
@generated from sparse/control/magma_zmslice.cpp, normal z -> s, Wed Jan 22 14:42:28 2025
@author Hartwig Anzt
*/
#include "magmasparse_internal.h"
#define THRESHOLD 10e-99
/**
Purpose
-------
Takes a matrix and extracts a slice for solving the system in parallel:
B = A( i:i+n, : ) and ALOC = A(i:i+n,i:i+n) and ANLOCA(0:start - end:n,:)
B is of size n x n, ALOC of size end-start x end-start,
ANLOC of size end-start x n
The last slice might be smaller. For the non-local parts, B is the identity.
comm contains 1ess in the locations that are non-local but needed to
solve local system.
Arguments
---------
@param[in]
num_slices magma_int_t
number of slices
@param[in]
slice magma_int_t
slice id (0.. num_slices-1)
@param[in]
A magma_s_matrix
sparse matrix in CSR
@param[out]
B magma_s_matrix*
sparse matrix in CSR
@param[out]
ALOC magma_s_matrix*
sparse matrix in CSR
@param[out]
ANLOC magma_s_matrix*
sparse matrix in CSR
@param[in,out]
comm_i magma_int_t*
communication plan
@param[in,out]
comm_v float*
communication plan
@param[in]
queue magma_queue_t
Queue to execute in.
@param[out]
start magma_int_t*
start of slice (row-index)
@param[out]
end magma_int_t*
end of slice (row-index)
@ingroup magmasparse_saux
********************************************************************/
extern "C" magma_int_t
magma_smslice(
magma_int_t num_slices,
magma_int_t slice,
magma_s_matrix A,
magma_s_matrix *B,
magma_s_matrix *ALOC,
magma_s_matrix *ANLOC,
magma_index_t *comm_i,
float *comm_v,
magma_int_t *start,
magma_int_t *end,
magma_queue_t queue )
{
magma_int_t info = 0;
// make sure the target structure is empty
magma_smfree( B, queue );
if( A.num_rows != A.num_cols ){
printf("%% error: only supported for square matrices.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
goto cleanup;
}
if ( A.memory_location == Magma_CPU
&& A.storage_type == Magma_CSR ){
CHECK( magma_smconvert( A, B, Magma_CSR, Magma_CSR, queue ) );
magma_free_cpu( B->col );
magma_free_cpu( B->val );
CHECK( magma_smconvert( A, ALOC, Magma_CSR, Magma_CSR, queue ) );
magma_free_cpu( ALOC->col );
magma_free_cpu( ALOC->row );
magma_free_cpu( ALOC->val );
CHECK( magma_smconvert( A, ANLOC, Magma_CSR, Magma_CSR, queue ) );
magma_free_cpu( ANLOC->col );
magma_free_cpu( ANLOC->row );
magma_free_cpu( ANLOC->val );
magma_int_t i,j,k, nnz, nnz_loc=0, loc_row = 0, nnz_nloc = 0;
magma_index_t col;
magma_int_t size = magma_ceildiv( A.num_rows, num_slices );
magma_int_t lstart = slice*size;
magma_int_t lend = min( (slice+1)*size, A.num_rows );
// correct size for last slice
size = lend-lstart;
CHECK( magma_index_malloc_cpu( &ALOC->row, size+1 ) );
CHECK( magma_index_malloc_cpu( &ANLOC->row, size+1 ) );
// count elements for slice - identity for rest
nnz = A.row[ lend ] - A.row[ lstart ] + ( A.num_rows - size );
CHECK( magma_index_malloc_cpu( &B->col, nnz ) );
CHECK( magma_smalloc_cpu( &B->val, nnz ) );
// for the communication plan
for( i=0; i<A.num_rows; i++ ) {
comm_i[i] = 0;
comm_v[i] = MAGMA_S_ZERO;
}
k=0;
B->row[i] = 0;
ALOC->row[0] = 0;
ANLOC->row[0] = 0;
// identity above slice
for( i=0; i<lstart; i++ ) {
B->row[i+1] = B->row[i]+1;
B->val[k] = MAGMA_S_ONE;
B->col[k] = i;
k++;
}
// slice
for( i=lstart; i<lend; i++ ) {
B->row[i+1] = B->row[i] + (A.row[i+1]-A.row[i]);
for( j=A.row[i]; j<A.row[i+1]; j++ ){
B->val[k] = A.val[j];
col = A.col[j];
B->col[k] = col;
// communication plan
if( col<lstart || col>=lend ){
comm_i[ col ] = 1;
comm_v[ col ] = comm_v[ col ]
+ MAGMA_S_MAKE( MAGMA_S_ABS( A.val[j] ), 0.0 );
nnz_nloc++;
} else {
nnz_loc++;
}
k++;
}
loc_row++;
ALOC->row[ loc_row ] = nnz_loc;
ANLOC->row[ loc_row ] = nnz_nloc;
}
CHECK( magma_index_malloc_cpu( &ALOC->col, nnz_loc ) );
CHECK( magma_smalloc_cpu( &ALOC->val, nnz_loc ) );
ALOC->num_rows = size;
ALOC->num_cols = size;
ALOC->nnz = nnz_loc;
CHECK( magma_index_malloc_cpu( &ANLOC->col, nnz_nloc ) );
CHECK( magma_smalloc_cpu( &ANLOC->val, nnz_nloc ) );
ANLOC->num_rows = size;
ANLOC->num_cols = A.num_cols;
ANLOC->nnz = nnz_nloc;
nnz_loc = 0;
nnz_nloc = 0;
// local/nonlocal matrix
for( i=lstart; i<lend; i++ ) {
for( j=A.row[i]; j<A.row[i+1]; j++ ){
col = A.col[j];
// insert only in local part in ALOC, nonlocal in ANLOC
if( col<lstart || col>=lend ){
ANLOC->val[ nnz_nloc ] = A.val[j];
ANLOC->col[ nnz_nloc ] = col;
nnz_nloc++;
} else {
ALOC->val[ nnz_loc ] = A.val[j];
ALOC->col[ nnz_loc ] = col-lstart;
nnz_loc++;
}
}
}
// identity below slice
for( i=lend; i<A.num_rows; i++ ) {
B->row[i+1] = B->row[i]+1;
B->val[k] = MAGMA_S_ONE;
B->col[k] = i;
k++;
}
B->nnz = k;
*start = lstart;
*end = lend;
}
else {
printf("error: mslice only supported for CSR matrices on the CPU: %d %d.\n",
int(A.memory_location), int(A.storage_type) );
info = MAGMA_ERR_NOT_SUPPORTED;
}
cleanup:
return info;
}
|