File: testing_dlarfg.cpp

package info (click to toggle)
magma 2.9.0%2Bds-3
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid
  • size: 83,556 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 945; xml: 253; csh: 232; sh: 203; perl: 104
file content (129 lines) | stat: -rw-r--r-- 5,015 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
    -- MAGMA (version 2.9.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date January 2025

       @generated from testing/testing_zlarfg.cpp, normal z -> d, Wed Jan 22 14:40:23 2025
       @author Mark Gates
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "flops.h"
#include "magma_v2.h"
#include "magma_lapack.h"
#include "testings.h"


int main( int argc, char** argv)
{
    TESTING_CHECK( magma_init() );
    magma_print_environment();

    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double *h_x, *h_x2, *h_tau, *h_tau2;
    magmaDouble_ptr d_x, d_tau;
    double c_neg_one = MAGMA_D_NEG_ONE;
    double      error, error2, work[1];
    magma_int_t N, nb, lda, ldda, size;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    int status = 0;

    magma_opts opts;
    opts.parse_opts( argc, argv );

    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    // does larfg on nb columns, one after another
    nb = (opts.nb > 0 ? opts.nb : 64);
    
    printf("%%   N    nb    CPU GFLop/s (ms)    GPU Gflop/s (ms)   error      tau error\n");
    printf("%%=========================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda  = N;
            ldda = magma_roundup( N, opts.align );  // multiple of 32 by default
            gflops = FLOPS_DLARFG( N ) / 1e9 * nb;
    
            TESTING_CHECK( magma_dmalloc_cpu( &h_x,    N*nb ));
            TESTING_CHECK( magma_dmalloc_cpu( &h_x2,   N*nb ));
            TESTING_CHECK( magma_dmalloc_cpu( &h_tau,  nb   ));
            TESTING_CHECK( magma_dmalloc_cpu( &h_tau2, nb   ));
        
            TESTING_CHECK( magma_dmalloc( &d_x,   ldda*nb ));
            TESTING_CHECK( magma_dmalloc( &d_tau, nb      ));
            
            /* Initialize the vectors */
            size = N*nb;
            lapackf77_dlarnv( &ione, ISEED, &size, h_x );
            
            /* =====================================================================
               Performs operation using MAGMABLAS
               =================================================================== */
            magma_dsetmatrix( N, nb, h_x, N, d_x, ldda, opts.queue );
            
            gpu_time = magma_sync_wtime( opts.queue );
            for( int j = 0; j < nb; ++j ) {
                magmablas_dlarfg( N, &d_x[0+j*ldda], &d_x[1+j*ldda], ione, &d_tau[j], opts.queue );
            }
            gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
            gpu_perf = gflops / gpu_time;
            
            magma_dgetmatrix( N, nb, d_x, ldda, h_x2, N, opts.queue );
            magma_dgetvector( nb, d_tau, 1, h_tau2, 1, opts.queue );
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            cpu_time = magma_wtime();
            for( int j = 0; j < nb; ++j ) {
                lapackf77_dlarfg( &N, &h_x[0+j*lda], &h_x[1+j*lda], &ione, &h_tau[j] );
            }
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gflops / cpu_time;
            
            /* =====================================================================
               Error Computation and Performance Comparison
               =================================================================== */
            blasf77_daxpy( &size, &c_neg_one, h_x, &ione, h_x2, &ione );
            error = lapackf77_dlange( "F", &N, &nb, h_x2, &N, work )
                  / lapackf77_dlange( "F", &N, &nb, h_x,  &N, work );
            
            // tau can be 0
            blasf77_daxpy( &nb, &c_neg_one, h_tau, &ione, h_tau2, &ione );
            error2 = lapackf77_dlange( "F", &nb, &ione, h_tau,  &nb, work );
            if ( error2 != 0 ) {
                error2 = lapackf77_dlange( "F", &nb, &ione, h_tau2, &nb, work ) / error2;
            }

            printf("%5lld %5lld   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %8.2e   %s\n",
                   (long long) N, (long long) nb, cpu_perf, 1000.*cpu_time, gpu_perf, 1000.*gpu_time,
                   error, error2,
                   (error < tol && error2 < tol ? "ok" : "failed") );
            status += ! (error < tol && error2 < tol);
            
            magma_free_cpu( h_x    );
            magma_free_cpu( h_x2   );
            magma_free_cpu( h_tau  );
            magma_free_cpu( h_tau2 );
        
            magma_free( d_x   );
            magma_free( d_tau );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    opts.cleanup();
    TESTING_CHECK( magma_finalize() );
    return status;
}