1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
#ifdef UTEST
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <math.h> /* fabs */
#include "blis_utest.h"
#include "bli_gemm_power7_opt_8x4.h"
#define COLMAJ_INDEX(row,col,ld) ((col*ld)+row)
#define ROWMAJ_INDEX(row,col,ld) ((row*ld)+col)
#define BLIS_INDEX(row,col,rs,cs) ((row*rs)+(col*cs))
#define MR BLIS_DEFAULT_MR_D
#define NR BLIS_DEFAULT_NR_D
#define LDA MR
#define LDB NR
#define EPSILON 0.0000001
/*
* Perform
* c = beta * c + alpha * a * b
* where
* alpha & beta are scalars
* c is mr x nr in blis-format, (col-stride & row-stride)
* a is mr x k in packed col-maj format (leading dim is mr)
* b is k x nr in packed row-maj format (leading dim is nr)
*/
void bli_dgemm_check(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
int i, j, kk;
double c00;
for (i=0; i < MR; i++) {
for (j=0; j < NR; j++) {
c00 = c[BLIS_INDEX(i,j,rs_c,cs_c)] * *beta;
for (kk=0; kk < k; kk++)
c00 += *alpha * (a[COLMAJ_INDEX(i,kk,LDA)] * b[ROWMAJ_INDEX(kk,j,LDB)]);
c[BLIS_INDEX(i,j,rs_c,cs_c)] = c00;
}
}
}
int main(int argc, char *argv[])
{
double *A, *B, *C, *C2;
double alpha = 1.0, beta = 1.0;
long i, j;
long k = 128;
int iters = 10;
int errors;
struct timeval tv_start, tv_end;
switch (argc) {
case 2:
k = atoi(argv[1]);
case 1:
break;
default:
printf("Usage: %s [k]\n", argv[0]);
return 1;
break;
}
//long rs_c = 1, cs_c = MR; // Column major
long rs_c = NR, cs_c = 1; // Row major
A = (double*)malloc(LDA * k * sizeof(double));
B = (double*)malloc(LDB * k * sizeof(double));
C = (double*)malloc(MR * NR * sizeof(double));
C2 = (double*)malloc(MR * NR * sizeof(double));
/* Initialize C matrix in blis format */
for (j=0; j<NR; j++)
for (i=0; i<MR; i++)
C2[BLIS_INDEX(i,j,rs_c,cs_c)] = C[BLIS_INDEX(i,j,rs_c,cs_c)] = drand48();
/* Initialize A matrix in column major format */
for (j=0; j<k; j++)
for (i=0; i<MR; i++)
A[COLMAJ_INDEX(i,j,LDA)] = drand48();
/* Initialize B matrix in row major format */
for (j=0; j<NR; j++)
for (i=0; i<k; i++)
B[ROWMAJ_INDEX(i,j,LDB)] = drand48();
/* First check the results */
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, NULL);
bli_dgemm_check(k, &alpha, A, B, &beta, C2, rs_c, cs_c, NULL);
for (i=0, errors=0; i<MR*NR-1; i++) {
if (fabs(C[i] - C2[i]) > EPSILON) {
if (errors<20) printf(" %ld expected=%f got=%f\n", i, C2[i], C[i]);
errors++;
}
}
printf("Errors = %d\n", errors);
if (errors) {
return -1;
}
/* Now get the performance */
gettimeofday(&tv_start, NULL);
for (i=0; i<iters; i++) {
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, NULL);
}
gettimeofday(&tv_end, NULL);
float secs = (tv_end.tv_sec - tv_start.tv_sec) + (double)(tv_end.tv_usec - tv_start.tv_usec)/1E6;
{
float gflops = ((2.0*MR*NR*k*iters)/1E9)/secs;
printf("%d %d %ld : GFLOPS = %6.3f\n", MR, NR, k, gflops);
}
return 0;
}
#endif
|