1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
// TinyVector<T,N> DAXPY benchmark
//#define BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE
#include <blitz/array.h>
#include <blitz/timer.h>
#include <random/uniform.h>
BZ_USING_NAMESPACE(blitz)
ranlib::Uniform<double> rnd;
template<class T>
void optimizationSink(T&);
template<int N_rank>
void tinyDAXPYBenchmark(TinyVector<double,N_rank>, int iters, double a)
{
Timer timer;
TinyVector<double,N_rank> ta, tb, tc, td, te, tf, tg, th, ti, tj;
for (int i=0; i < N_rank; ++i)
{
ta[i] = rnd.random()+1;
tb[i] = rnd.random()+1;
tc[i] = rnd.random()+1;
td[i] = rnd.random()+1;
te[i] = rnd.random()+1;
tf[i] = rnd.random()+1;
tg[i] = rnd.random()+1;
th[i] = rnd.random()+1;
ti[i] = rnd.random()+1;
tj[i] = rnd.random()+1;
}
double b = -a;
double numFlops = 0;
if (N_rank < 20)
{
timer.start();
for (int i=0; i < iters; ++i)
{
ta += a * tb;
tc += a * td;
te += a * tf;
tg += a * th;
ti += a * tj;
tb += b * ta;
td += b * tc;
tf += b * te;
th += b * tg;
tj += b * ti;
ta += a * tb;
tc += a * td;
te += a * tf;
tg += a * th;
ti += a * tj;
tb += b * ta;
td += b * tc;
tf += b * te;
th += b * tg;
tj += b * ti;
}
timer.stop();
numFlops = 40.0 * N_rank * double(iters);
}
else {
timer.start();
for (int i=0; i < iters; ++i)
{
ta += a * tb;
tb += b * ta;
}
timer.stop();
numFlops = 4.0 * N_rank * double(iters);
}
optimizationSink(ta);
optimizationSink(tb);
optimizationSink(tc);
optimizationSink(td);
optimizationSink(te);
optimizationSink(tf);
optimizationSink(tg);
optimizationSink(th);
optimizationSink(ti);
optimizationSink(tj);
timer.stop();
float Gflops = numFlops / (1e9*timer.elapsed());
if (iters > 1)
{
cout << setw(5) << N_rank << '\t' << Gflops << endl;
}
}
double a = 0.3429843;
template<class T>
void optimizationSink(T&)
{
}
int main()
{
cout << "TinyVector<double,N> DAXPY benchmark" << endl
<< setw(5) << "N" << '\t' << "Gflops/" << Timer::indep_var() << endl;
tinyDAXPYBenchmark(TinyVector<double,1>(), 800000, a);
tinyDAXPYBenchmark(TinyVector<double,2>(), 800000, a);
tinyDAXPYBenchmark(TinyVector<double,3>(), 800000, a);
tinyDAXPYBenchmark(TinyVector<double,4>(), 700000, a);
tinyDAXPYBenchmark(TinyVector<double,5>(), 600000, a);
tinyDAXPYBenchmark(TinyVector<double,6>(), 500000, a);
tinyDAXPYBenchmark(TinyVector<double,7>(), 500000, a);
tinyDAXPYBenchmark(TinyVector<double,8>(), 500000, a);
tinyDAXPYBenchmark(TinyVector<double,9>(), 500000, a);
tinyDAXPYBenchmark(TinyVector<double,10>(), 500000, a);
return 0;
}
|