1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
/*
-- MAGMA (version 2.9.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2025
@generated from testing/testing_zsymmetrize.cpp, normal z -> s, Wed Jan 22 14:40:24 2025
@author Mark Gates
*/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include "magma_v2.h"
#include "magma_lapack.h"
#include "testings.h"
/* ////////////////////////////////////////////////////////////////////////////
-- Testing ssymmetrize
Code is very similar to testing_stranspose.cpp
*/
int main( int argc, char** argv)
{
TESTING_CHECK( magma_init() );
magma_print_environment();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
float c_neg_one = MAGMA_S_NEG_ONE;
float *h_A, *h_R;
magmaFloat_ptr d_A;
magma_int_t N, size, lda, ldda;
magma_int_t ione = 1;
int status = 0;
magma_opts opts;
opts.parse_opts( argc, argv );
printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) );
printf("%% N CPU GByte/s (ms) GPU GByte/s (ms) check\n");
printf("%%====================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
ldda = magma_roundup( N, opts.align ); // multiple of 32 by default
size = lda*N;
// load strictly lower triangle, save strictly upper triangle
gbytes = sizeof(float) * 1.*N*(N-1) / 1e9;
TESTING_CHECK( magma_smalloc_cpu( &h_A, size ));
TESTING_CHECK( magma_smalloc_cpu( &h_R, size ));
TESTING_CHECK( magma_smalloc( &d_A, ldda*N ));
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < N; ++i ) {
h_A[i + j*lda] = MAGMA_S_MAKE( i + j/10000., j );
}
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_ssetmatrix( N, N, h_A, lda, d_A, ldda, opts.queue );
gpu_time = magma_sync_wtime( opts.queue );
//magmablas_ssymmetrize( opts.uplo, N-2, d_A+1+ldda, ldda, opts.queue ); // inset by 1 row & col
magmablas_ssymmetrize( opts.uplo, N, d_A, ldda, opts.queue );
gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Performs operation using naive in-place algorithm
(LAPACK doesn't implement symmetrize)
=================================================================== */
cpu_time = magma_wtime();
//for( int j = 1; j < N-1; ++j ) { // inset by 1 row & col
// for( int i = 1; i < j; ++i ) {
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < j; ++i ) {
if ( opts.uplo == MagmaLower ) {
h_A[i + j*lda] = MAGMA_S_CONJ( h_A[j + i*lda] );
}
else {
h_A[j + i*lda] = MAGMA_S_CONJ( h_A[i + j*lda] );
}
}
// real diagonal
h_A[j + j*lda] = MAGMA_S_MAKE( MAGMA_S_REAL( h_A[j + j*lda] ), 0 );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
magma_sgetmatrix( N, N, d_A, ldda, h_R, lda, opts.queue );
blasf77_saxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_slange("f", &N, &N, h_R, &lda, work);
printf("%5lld %7.2f (%7.2f) %7.2f (%7.2f) %s\n",
(long long) N, cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
(error == 0. ? "ok" : "failed") );
status += ! (error == 0.);
magma_free_cpu( h_A );
magma_free_cpu( h_R );
magma_free( d_A );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_CHECK( magma_finalize() );
return status;
}
|