File: example_v1.cpp

package info (click to toggle)
magma-rocm 2.9.0%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 83,540 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 945; xml: 253; csh: 232; sh: 203; perl: 104
file content (175 lines) | stat: -rw-r--r-- 5,415 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
// This is a simple standalone example. See README.txt

#include <stdio.h>
#include <stdlib.h>

#include "magma.h"
#include "magma_lapack.h"  // if you need BLAS & LAPACK


// ------------------------------------------------------------
// Replace with your code to initialize the A matrix.
// This simply initializes it to random values.
// Note that A is stored column-wise, not row-wise.
//
// m   - number of rows,    m >= 0.
// n   - number of columns, n >= 0.
// A   - m-by-n array of size lda*n.
// lda - leading dimension of A, lda >= m.
//
// When lda > m, rows (m, ..., lda-1) below the bottom of the matrix are ignored.
// This is helpful for working with sub-matrices, and for aligning the top
// of columns to memory boundaries (or avoiding such alignment).
// Significantly better memory performance is achieved by having the outer loop
// over columns (j), and the inner loop over rows (i), than the reverse.
void zfill_matrix(
    magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda )
{
    #define A(i_, j_) A[ (i_) + (j_)*lda ]
    
    magma_int_t i, j;
    for (j=0; j < n; ++j) {
        for (i=0; i < m; ++i) {
            A(i,j) = MAGMA_Z_MAKE( rand() / ((double) RAND_MAX),    // real part
                                   rand() / ((double) RAND_MAX) );  // imag part
        }
    }
    
    #undef A
}


// ------------------------------------------------------------
// Replace with your code to initialize the X rhs.
void zfill_rhs(
    magma_int_t m, magma_int_t nrhs, magmaDoubleComplex *X, magma_int_t ldx )
{
    zfill_matrix( m, nrhs, X, ldx );
}


// ------------------------------------------------------------
// Replace with your code to initialize the dA matrix on the GPU device.
// This simply leverages the CPU version above to initialize it to random values,
// and copies the matrix to the GPU.
void zfill_matrix_gpu(
    magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda )
{
    magmaDoubleComplex *A;
    int lda = ldda;
    magma_zmalloc_cpu( &A, m*lda );
    if (A == NULL) {
        fprintf( stderr, "malloc failed\n" );
        return;
    }
    zfill_matrix( m, n, A, lda );
    magma_zsetmatrix( m, n, A, lda, dA, ldda );
    magma_free_cpu( A );
}


// ------------------------------------------------------------
// Replace with your code to initialize the dX rhs on the GPU device.
void zfill_rhs_gpu(
    magma_int_t m, magma_int_t nrhs, magmaDoubleComplex *dX, magma_int_t lddx )
{
    zfill_matrix_gpu( m, nrhs, dX, lddx );
}


// ------------------------------------------------------------
// Solve A * X = B, where A and X are stored in CPU host memory.
// Internally, MAGMA transfers data to the GPU device
// and uses a hybrid CPU + GPU algorithm.
void cpu_interface( magma_int_t n, magma_int_t nrhs )
{
    magmaDoubleComplex *A=NULL, *X=NULL;
    magma_int_t *ipiv=NULL;
    magma_int_t lda  = n;
    magma_int_t ldx  = lda;
    magma_int_t info = 0;
    
    // magma_*malloc_cpu routines for CPU memory are type-safe and align to memory boundaries,
    // but you can use malloc or new if you prefer.
    magma_zmalloc_cpu( &A, lda*n );
    magma_zmalloc_cpu( &X, ldx*nrhs );
    magma_imalloc_cpu( &ipiv, n );
    if (A == NULL || X == NULL || ipiv == NULL) {
        fprintf( stderr, "malloc failed\n" );
        goto cleanup;
    }
    
    // Replace these with your code to initialize A and X
    zfill_matrix( n, n, A, lda );
    zfill_rhs( n, nrhs, X, ldx );
    
    magma_zgesv( n, 1, A, lda, ipiv, X, lda, &info );
    if (info != 0) {
        fprintf( stderr, "magma_zgesv failed with info=%d\n", info );
    }
    
    // TODO: use result in X
    
cleanup:
    magma_free_cpu( A );
    magma_free_cpu( X );
    magma_free_cpu( ipiv );
}


// ------------------------------------------------------------
// Solve dA * dX = dB, where dA and dX are stored in GPU device memory.
// Internally, MAGMA uses a hybrid CPU + GPU algorithm.
void gpu_interface( magma_int_t n, magma_int_t nrhs )
{
    magmaDoubleComplex *dA=NULL, *dX=NULL;
    magma_int_t *ipiv=NULL;
    magma_int_t ldda = magma_roundup( n, 32 );  // round up to multiple of 32 for best GPU performance
    magma_int_t lddx = ldda;
    magma_int_t info = 0;
    
    // magma_*malloc routines for GPU memory are type-safe,
    // but you can use cudaMalloc if you prefer.
    magma_zmalloc( &dA, ldda*n );
    magma_zmalloc( &dX, lddx*nrhs );
    magma_imalloc_cpu( &ipiv, n );  // ipiv always on CPU
    if (dA == NULL || dX == NULL || ipiv == NULL) {
        fprintf( stderr, "malloc failed\n" );
        goto cleanup;
    }
    
    // Replace these with your code to initialize A and X
    zfill_matrix_gpu( n, n, dA, ldda );
    zfill_rhs_gpu( n, nrhs, dX, lddx );
    
    magma_zgesv_gpu( n, 1, dA, ldda, ipiv, dX, ldda, &info );
    if (info != 0) {
        fprintf( stderr, "magma_zgesv_gpu failed with info=%d\n", info );
    }
    
    // TODO: use result in dX
    
cleanup:
    magma_free( dA );
    magma_free( dX );
    magma_free_cpu( ipiv );
}


// ------------------------------------------------------------
int main( int argc, char** argv )
{
    magma_init();
    
    magma_int_t n = 1000;
    magma_int_t nrhs = 1;
    
    printf( "using MAGMA CPU interface\n" );
    cpu_interface( n, nrhs );

    printf( "using MAGMA GPU interface\n" );
    gpu_interface( n, nrhs );
    
    magma_finalize();
    return 0;
}