1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
/*
-- MAGMA (version 2.9.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2025
@generated from testing/testing_zlarfg.cpp, normal z -> c, Wed Jan 22 14:40:23 2025
@author Mark Gates
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "flops.h"
#include "magma_v2.h"
#include "magma_lapack.h"
#include "testings.h"
int main( int argc, char** argv)
{
TESTING_CHECK( magma_init() );
magma_print_environment();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
magmaFloatComplex *h_x, *h_x2, *h_tau, *h_tau2;
magmaFloatComplex_ptr d_x, d_tau;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
float error, error2, work[1];
magma_int_t N, nb, lda, ldda, size;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
int status = 0;
magma_opts opts;
opts.parse_opts( argc, argv );
float tol = opts.tolerance * lapackf77_slamch("E");
// does larfg on nb columns, one after another
nb = (opts.nb > 0 ? opts.nb : 64);
printf("%% N nb CPU GFLop/s (ms) GPU Gflop/s (ms) error tau error\n");
printf("%%=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
ldda = magma_roundup( N, opts.align ); // multiple of 32 by default
gflops = FLOPS_CLARFG( N ) / 1e9 * nb;
TESTING_CHECK( magma_cmalloc_cpu( &h_x, N*nb ));
TESTING_CHECK( magma_cmalloc_cpu( &h_x2, N*nb ));
TESTING_CHECK( magma_cmalloc_cpu( &h_tau, nb ));
TESTING_CHECK( magma_cmalloc_cpu( &h_tau2, nb ));
TESTING_CHECK( magma_cmalloc( &d_x, ldda*nb ));
TESTING_CHECK( magma_cmalloc( &d_tau, nb ));
/* Initialize the vectors */
size = N*nb;
lapackf77_clarnv( &ione, ISEED, &size, h_x );
/* =====================================================================
Performs operation using MAGMABLAS
=================================================================== */
magma_csetmatrix( N, nb, h_x, N, d_x, ldda, opts.queue );
gpu_time = magma_sync_wtime( opts.queue );
for( int j = 0; j < nb; ++j ) {
magmablas_clarfg( N, &d_x[0+j*ldda], &d_x[1+j*ldda], ione, &d_tau[j], opts.queue );
}
gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
gpu_perf = gflops / gpu_time;
magma_cgetmatrix( N, nb, d_x, ldda, h_x2, N, opts.queue );
magma_cgetvector( nb, d_tau, 1, h_tau2, 1, opts.queue );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
for( int j = 0; j < nb; ++j ) {
lapackf77_clarfg( &N, &h_x[0+j*lda], &h_x[1+j*lda], &ione, &h_tau[j] );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
/* =====================================================================
Error Computation and Performance Comparison
=================================================================== */
blasf77_caxpy( &size, &c_neg_one, h_x, &ione, h_x2, &ione );
error = lapackf77_clange( "F", &N, &nb, h_x2, &N, work )
/ lapackf77_clange( "F", &N, &nb, h_x, &N, work );
// tau can be 0
blasf77_caxpy( &nb, &c_neg_one, h_tau, &ione, h_tau2, &ione );
error2 = lapackf77_clange( "F", &nb, &ione, h_tau, &nb, work );
if ( error2 != 0 ) {
error2 = lapackf77_clange( "F", &nb, &ione, h_tau2, &nb, work ) / error2;
}
printf("%5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %8.2e %s\n",
(long long) N, (long long) nb, cpu_perf, 1000.*cpu_time, gpu_perf, 1000.*gpu_time,
error, error2,
(error < tol && error2 < tol ? "ok" : "failed") );
status += ! (error < tol && error2 < tol);
magma_free_cpu( h_x );
magma_free_cpu( h_x2 );
magma_free_cpu( h_tau );
magma_free_cpu( h_tau2 );
magma_free( d_x );
magma_free( d_tau );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_CHECK( magma_finalize() );
return status;
}
|