File: example_f.F90

package info (click to toggle)
magma-rocm 2.9.0%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 83,540 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 945; xml: 253; csh: 232; sh: 203; perl: 104
file content (196 lines) | stat: -rw-r--r-- 5,679 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
!! This is a simple standalone example. See README.txt

module example_f
use magma
implicit none
contains


!! ------------------------------------------------------------
!! Replace with your code to initialize the A matrix.
!! This simply initializes it to random values.
!! Note that A is stored column-wise, not row-wise.
!!
!! m   - number of rows,    m >= 0.
!! n   - number of columns, n >= 0.
!! A   - m-by-n array of size lda*n.
!! lda - leading dimension of A, lda >= m.
!!
!! When lda > m, rows (m, ..., lda-1) below the bottom of the matrix are ignored.
!! This is helpful for working with sub-matrices, and for aligning the top
!! of columns to memory boundaries (or avoiding such alignment).
!! Significantly better memory performance is achieved by having the outer loop
!! over columns (j), and the inner loop over rows (i), than the reverse.
subroutine zfill_matrix( m, n, A, lda )
    integer :: m, n, lda
    complex*16 :: A(:,:)
    
    integer :: i, j
    real*8 :: re, im
    
    do j = 1, n
        do i = 1, m
            call random_number( re )
            call random_number( im )
            A(i,j) = cmplx( re, im )
        end do
    end do
end subroutine


!! ------------------------------------------------------------
!! Replace with your code to initialize the X rhs.
subroutine zfill_rhs( m, nrhs, X, ldx )
    integer :: m, nrhs, ldx
    complex*16 :: X(:,:)
    call zfill_matrix( m, nrhs, X, ldx )
end subroutine


!! ------------------------------------------------------------
!! Replace with your code to initialize the dA matrix on the GPU device.
!! This simply leverages the CPU version above to initialize it to random values,
!! and copies the matrix to the GPU.
subroutine zfill_matrix_gpu( m, n, dA, lda )
    integer :: m, n, lda
    magma_devptr_t :: dA
    magma_devptr_t :: queue  !! really a CPU pointer

    complex*16, allocatable :: A(:,:)
    integer :: sizeof_complex=16
    
    allocate( A(lda,n) )
    call zfill_matrix( m, n, A, lda )
    call magmaf_queue_create( 0, queue )
    call magmaf_zsetmatrix( m, n, A, lda, dA, lda, queue )
    call magmaf_queue_destroy( queue )
    deallocate( A )
end subroutine


!! ------------------------------------------------------------
!! Replace with your code to initialize the dX rhs on the GPU device.
subroutine zfill_rhs_gpu( m, nrhs, dX, ldx )
    integer :: m, nrhs, ldx
    magma_devptr_t :: dX
    call zfill_matrix_gpu( m, nrhs, dX, ldx )
end subroutine


!! ------------------------------------------------------------
!! Solve A * X = B, where A and X are stored in CPU host memory.
!! Internally, MAGMA transfers data to the GPU device
!! and uses a hybrid CPU + GPU algorithm.
subroutine cpu_interface( n, nrhs )
    integer :: n, nrhs
    
    complex*16, allocatable :: A(:,:), X(:,:)
    integer,    allocatable :: ipiv(:)
    integer :: lda, ldx, info
    
    lda  = n
    ldx  = lda
    info = 0
    
    !! allocate CPU memory
    !! no magma Fortran routines for this, so use Fortran's normal mechanism
    allocate( A(lda, n) )
    allocate( X(ldx, nrhs) )
    allocate( ipiv(n) )
    
    !! Replace these with your code to initialize A and X
    call zfill_matrix( n, n, A, lda )
    call zfill_rhs( n, nrhs, X, ldx )
    
    ! Solve using LU factorization
    call magmaf_zgesv( n, 1, A, lda, ipiv, X, lda, info )
    if (info .ne. 0) then
        print "(a,i5)", "magma_zgesv failed with info=", info
    end if
    
    ! Instead, for least squares, or if preferred over LU, use QR
    !call magmaf_zgels( MagmaNoTrans, n, n, 1, A, lda, X, lda, work, lwork, info )
    
    ! Instead, if A is SPD (symmetric/Hermitian positive definite), use Cholesky
    !call magmaf_zposv( MagmaLower, n, 1, A, lda, X, lda, info )
    
    !! TODO: use result in X
    
!! cleanup:
    deallocate( A )
    deallocate( X )
    deallocate( ipiv )
end subroutine


!! ------------------------------------------------------------
!! Solve dA * dX = dB, where dA and dX are stored in GPU device memory.
!! Internally, MAGMA uses a hybrid CPU + GPU algorithm.
subroutine gpu_interface( n, nrhs )
    integer :: n, nrhs
    
    magma_devptr_t :: dA, dX
    integer, allocatable :: ipiv(:)
    integer :: ldda, lddx, info
    integer :: sizeof_complex=16

    ldda = ceiling(real(n)/32)*32
    lddx = ldda
    info = 0

    !! allocate GPU memory
    info = magmaf_zmalloc( dA, ldda*n )
    info = magmaf_zmalloc( dX, lddx*nrhs )
    allocate( ipiv(n) )  !! ipiv always on CPU
    if (dA == 0 .or. dX == 0) then
        print "(a)", "malloc failed"
        goto 1000
    endif
    
    !! Replace these with your code to initialize A and X
    call zfill_matrix_gpu( n, n, dA, ldda )
    call zfill_rhs_gpu( n, nrhs, dX, lddx )
    
    call magmaf_zgesv_gpu( n, 1, dA, ldda, ipiv, dX, ldda, info )
    if (info .ne. 0) then
        print "(a,i5)", "magma_zgesv_gpu failed with info=", info
    endif
    
    !! TODO: use result in dX
    
!! cleanup:
1000 continue
    info = magmaf_free( dA )
    if (info .ne. 0) then
        print *, 'Error: magmaf_free( dA ) failed: ', info
    endif

    info = magmaf_free( dX )
    if (info .ne. 0) then
        print *, 'Error: magmaf_free( dX ) failed: ', info
    endif

    deallocate( ipiv )
end subroutine

end module


!! ------------------------------------------------------------
program main
    use magma
    use example_f
    implicit none
    
    integer :: n=1000, nrhs=1
    
    call magmaf_init()
    
    print "(a)", "using MAGMA CPU interface"
    call cpu_interface( n, nrhs )

    print "(a)", "using MAGMA GPU interface"
    call gpu_interface( n, nrhs )
    
    call magmaf_finalize()
end program