1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
|
/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */
#ifndef HPCC_H
#define HPCC_H 1
/* HPL includes:
stdio.h
stdlib.h
string.h
stdarg.h
vararg.h (if necessary)
mpi.h
*/
#include <hpl.h>
#include <hpccver.h>
#include <math.h>
#include <time.h>
#define MPIFFT_TIMING_COUNT 8
/* Define 64-bit types and corresponding format strings for printf() and constants */
#ifdef LONG_IS_64BITS
typedef unsigned long u64Int;
typedef long s64Int;
#define FSTR64 "%ld"
#define FSTRU64 "%lu"
#define ZERO64B 0L
#else
typedef unsigned long long u64Int;
typedef long long s64Int;
#define FSTR64 "%lld"
#define FSTRU64 "%llu"
#define ZERO64B 0LL
#endif
typedef struct {
double GBs, time, residual;
int n, nb, nprow, npcol;
} PTRANS_RuntimeData;
/* parameters of execution */
typedef struct {
/* HPL section */
HPL_T_test test;
int nval [HPL_MAX_PARAM],
nbval [HPL_MAX_PARAM],
pval [HPL_MAX_PARAM],
qval [HPL_MAX_PARAM],
nbmval[HPL_MAX_PARAM],
ndvval[HPL_MAX_PARAM],
ndhval[HPL_MAX_PARAM];
HPL_T_ORDER porder;
HPL_T_FACT pfaval[HPL_MAX_PARAM],
rfaval[HPL_MAX_PARAM];
HPL_T_TOP topval[HPL_MAX_PARAM];
HPL_T_FACT rpfa;
HPL_T_SWAP fswap;
int ns, nbs, npqs, npfs, nbms, ndvs, nrfs, ntps, ndhs, tswap, L1notran, Unotran, equil, align;
/* HPCC section */
char inFname[256 + 1], outFname[256 + 1];
int PTRANSns, PTRANSnval[2 * HPL_MAX_PARAM];
int PTRANSnbs, PTRANSnbval[2 * HPL_MAX_PARAM];
int PTRANSnpqs, PTRANSpval[2 * HPL_MAX_PARAM], PTRANSqval[2 * HPL_MAX_PARAM];
double MPIRandomAccess_LCG_GUPs, MPIRandomAccess_GUPs, Star_LCG_GUPs, Single_LCG_GUPs, StarGUPs, SingleGUPs,
MPIRandomAccess_ErrorsFraction, MPIRandomAccess_time, MPIRandomAccess_CheckTime,
MPIRandomAccess_TimeBound,
MPIRandomAccess_LCG_ErrorsFraction, MPIRandomAccess_LCG_time, MPIRandomAccess_LCG_CheckTime,
MPIRandomAccess_LCG_TimeBound,
StarStreamCopyGBs, StarStreamScaleGBs,
StarStreamAddGBs, StarStreamTriadGBs, SingleStreamCopyGBs, SingleStreamScaleGBs,
SingleStreamAddGBs, SingleStreamTriadGBs, StarDGEMMGflops, SingleDGEMMGflops;
double StarFFTGflops, SingleFFTGflops, MPIFFTGflops, MPIFFT_maxErr;
double MaxPingPongLatency, RandomlyOrderedRingLatency, MinPingPongBandwidth,
NaturallyOrderedRingBandwidth, RandomlyOrderedRingBandwidth,
MinPingPongLatency, AvgPingPongLatency, MaxPingPongBandwidth, AvgPingPongBandwidth,
NaturallyOrderedRingLatency;
int DGEMM_N;
int StreamThreads, StreamVectorSize;
int FFT_N;
int MPIFFT_Procs;
int MPIRandomAccess_LCG_Algorithm, MPIRandomAccess_Algorithm;
HPL_RuntimeData HPLrdata;
PTRANS_RuntimeData PTRANSrdata;
int Failure; /* over all failure of the benchmark */
double MPIFFTtimingsForward[MPIFFT_TIMING_COUNT], MPIFFTtimingsBackward[MPIFFT_TIMING_COUNT];
size_t HPLMaxProcMem;
int HPLMaxProc, HPLMinProc;
int RunHPL, RunStarDGEMM, RunSingleDGEMM,
RunPTRANS, RunStarStream, RunSingleStream,
RunMPIRandomAccess_LCG, RunStarRandomAccess_LCG, RunSingleRandomAccess_LCG,
RunMPIRandomAccess, RunStarRandomAccess, RunSingleRandomAccess,
RunStarFFT, RunSingleFFT, RunMPIFFT,
RunLatencyBandwidth;
int FFTEnblk, FFTEnp, FFTEl2size;
s64Int RandomAccess_LCG_N, RandomAccess_N, MPIRandomAccess_LCG_ExeUpdates, MPIRandomAccess_ExeUpdates,
MPIRandomAccess_LCG_N, MPIRandomAccess_N, MPIRandomAccess_LCG_Errors, MPIRandomAccess_Errors, MPIFFT_N;
} HPCC_Params;
/*
This is what needs to be done to add a new benchmark:
- Add the benchmark code to the directory structure (headers, makefiles)
- Add benchmark output data to the HPCC_Params structure.
- Initialize the HPCC_Params structure data in HPCC_Init().
- Add a call to the benchmark function in main().
- Make sure that all the processes fill out the structure with the same data.
- Print the output of the benchmark in HPCC_Finalize().
- For tests that have "Star" and "Single" variants (DGEMM, RandomAccess, STREAM) the function
that performs the test returns a value (0 or 1) that indicates runtime failure and also returns
benchamark failure (due to wrong optimization that causes numerical error) by setting
params->Failure.
*/
int HPCC_external_init(int argc, char *argv[], void *extdata);
int HPCC_external_finalize(int argc, char *argv[], void *extdata);
extern int HPCC_Init(HPCC_Params *params);
extern int HPCC_Finalize(HPCC_Params *params);
extern int HPCC_LocalVectorSize(HPCC_Params *params, int vecCnt, size_t size, int pow2);
extern int
HPCC_Defaults(HPL_T_test *TEST, int *NS, int *N,
int *NBS, int *NB,
HPL_T_ORDER *PMAPPIN,
int *NPQS, int *P, int *Q,
int *NPFS, HPL_T_FACT *PF,
int *NBMS, int *NBM,
int *NDVS, int *NDV,
int *NRFS, HPL_T_FACT *RF,
int *NTPS, HPL_T_TOP *TP,
int *NDHS, int *DH,
HPL_T_SWAP *FSWAP, int *TSWAP, int *L1NOTRAN, int *UNOTRAN, int *EQUIL, int *ALIGN, MPI_Comm comm);
extern int HPL_main(int ARGC, char **ARGV, HPL_RuntimeData *rdata, int *failure);
extern float HPL_slamch (const HPL_T_MACH);
extern double HPCC_dweps();
extern float HPCC_sweps();
extern int HPCC_StarDGEMM(HPCC_Params *params);
extern int HPCC_SingleDGEMM(HPCC_Params *params);
extern int PTRANS(HPCC_Params *params);
extern int HPCC_MPIRandomAccess_LCG(HPCC_Params *params);
extern int HPCC_SingleRandomAccess_LCG(HPCC_Params *params);
extern int HPCC_StarRandomAccess_LCG(HPCC_Params *params);
extern int HPCC_MPIRandomAccess(HPCC_Params *params);
extern int HPCC_SingleRandomAccess(HPCC_Params *params);
extern int HPCC_StarRandomAccess(HPCC_Params *params);
extern int HPCC_SingleStream(HPCC_Params *params);
extern int HPCC_StarStream(HPCC_Params *params);
extern int HPCC_StarFFT(HPCC_Params *params);
extern int HPCC_SingleFFT(HPCC_Params *params);
extern int HPCC_MPIFFT(HPCC_Params *params);
extern int HPCC_TestFFT(HPCC_Params *params, int doIO, double *UGflops, int *Un, int *Ufailure);
extern int HPCC_TestDGEMM(HPCC_Params *params, int doIO, double *UGflops, int *Un, int *Ufailure);
extern int MaxMem(int nprocs, int imrow, int imcol, int nmat, int *mval, int *nval, int nbmat,
int *mbval, int *nbval, int ngrids, int *npval, int *nqval, long *maxMem);
extern int HPCC_Stream(HPCC_Params *params, int doIO, MPI_Comm comm, int world_rank,
double *copyGBs, double *scaleGBs, double *addGBs, double *triadGBs,
int *failure);
extern void main_bench_lat_bw(HPCC_Params *params);
extern int pdtrans(char *trans, int *m, int *n, int * mb, int *nb, double *a, int *lda,
double *beta, double *c__, int *ldc, int *imrow, int *imcol, double *work, int *iwork);
extern FILE* pdtransinfo(int *nmat, int *mval, int *nval, int *ldval,
int *nbmat, int *mbval, int *nbval, int *ldnbval, int *ngrids, int *npval, int *nqval,
int *ldpqval, int *iaseed, int *imrow, int *imcol, float *thresh, int *iam, int *nprocs,
double *eps, char *infname, int *fcl, char *outfname);
extern int pdmatgen(int *ictxt, char *aform, char *diag, int *m, int *n, int *mb, int *nb, double*a,
int *lda, int *iarow, int *iacol, int *iseed, int *iroff, int *irnum, int *icoff, int *icnum,
int * myrow, int *mycol, int *nprow, int *npcol, double alpha);
extern int pdmatcmp(int *ictxt, int *m_, int *n_, double *a, int *lda_, double *aCopy, int *ldc_,
double *error);
extern int pxerbla(int *ictxt, char *srname, int *info);
extern int slcombine_(int *ictxt, char *scope, char *op, char * timetype, int *n, int *ibeg,
double *times);
extern int icopy_(int *n, int *sx, int *incx, int * sy, int *incy);
extern int numroc_(int *, int *, int *, int *, int *);
extern int slboot_(void);
extern int sltimer_(int *i__);
extern int ilcm_(int *, int *);
extern int iceil_(int *, int *);
extern double pdrand();
extern int setran_(int *, int *, int *);
extern int jumpit_(int *, int *, int *, int *);
extern int xjumpm_(int *, int *, int *, int *, int *, int *, int *);
/* ---------------------------------------------------------------------- */
#define DPRN(i,v) do{printf(__FILE__ "(%d)@%d:" #v "=%g\n",__LINE__,i,(double)(v));fflush(stdout);}while(0)
#define BEGIN_IO(r,fn,f) if(0==r){f=fopen(fn,"a");if(!f)fprintf(f=stderr,"Problem with appending to file '%s'\n",fn)
#define END_IO(r,f) fflush(f); if (f!=stdout && f!=stderr) fclose(f);} f=(FILE*)(NULL)
#include <hpccmema.h>
#define XMALLOC(t,s) ((t*)malloc(sizeof(t)*(s)))
#define XCALLOC(t,s) ((t*)calloc((s),sizeof(t)))
#endif
|