File: tinydaxpy.cpp

package info (click to toggle)
blitz%2B%2B 1%3A0.10-3.2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 13,276 kB
  • ctags: 12,037
  • sloc: cpp: 70,465; sh: 11,116; fortran: 1,510; python: 1,246; f90: 852; makefile: 701
file content (124 lines) | stat: -rw-r--r-- 2,942 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// TinyVector<T,N> DAXPY benchmark

//#define BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE

#include <blitz/array.h>
#include <blitz/timer.h>
#include <random/uniform.h>

BZ_USING_NAMESPACE(blitz)

ranlib::Uniform<double> rnd;

template<class T>
void optimizationSink(T&);

template<int N_rank>
void tinyDAXPYBenchmark(TinyVector<double,N_rank>, int iters, double a)
{
    Timer timer;
   
    TinyVector<double,N_rank> ta, tb, tc, td, te, tf, tg, th, ti, tj;
    for (int i=0; i < N_rank; ++i)
    {
        ta[i] = rnd.random()+1;
        tb[i] = rnd.random()+1;
        tc[i] = rnd.random()+1;
        td[i] = rnd.random()+1;
        te[i] = rnd.random()+1;
        tf[i] = rnd.random()+1;
        tg[i] = rnd.random()+1;
        th[i] = rnd.random()+1;
        ti[i] = rnd.random()+1;
        tj[i] = rnd.random()+1;
    }

    double b = -a;

    double numFlops = 0;

    if (N_rank < 20)
    {
      timer.start();
      for (int i=0; i < iters; ++i)
      {
        ta += a * tb;
        tc += a * td;
        te += a * tf;
        tg += a * th;
        ti += a * tj;
        tb += b * ta;
        td += b * tc;
        tf += b * te;
        th += b * tg;
        tj += b * ti;
        ta += a * tb;
        tc += a * td;
        te += a * tf;
        tg += a * th;
        ti += a * tj;
        tb += b * ta;
        td += b * tc;
        tf += b * te;
        th += b * tg;
        tj += b * ti;
      }
      timer.stop();
      numFlops = 40.0 * N_rank * double(iters);
    }
    else {
      timer.start();
      for (int i=0; i < iters; ++i)
      {
        ta += a * tb;
        tb += b * ta;
      }
      timer.stop();
      numFlops = 4.0 * N_rank * double(iters);
    }

    optimizationSink(ta);
    optimizationSink(tb);
    optimizationSink(tc);
    optimizationSink(td);
    optimizationSink(te);
    optimizationSink(tf);
    optimizationSink(tg);
    optimizationSink(th);
    optimizationSink(ti);
    optimizationSink(tj);

    timer.stop();
    float Gflops = numFlops / (1e9*timer.elapsed());

    if (iters > 1)  
    {
    cout << setw(5) << N_rank << '\t' << Gflops << endl;
    }
}

double a = 0.3429843;

template<class T>
void optimizationSink(T&)
{
}

int main()
{
    cout << "TinyVector<double,N> DAXPY benchmark" << endl
         << setw(5) << "N" << '\t' << "Gflops/" << Timer::indep_var() << endl;
    tinyDAXPYBenchmark(TinyVector<double,1>(), 800000, a);
    tinyDAXPYBenchmark(TinyVector<double,2>(), 800000, a);
    tinyDAXPYBenchmark(TinyVector<double,3>(), 800000, a);
    tinyDAXPYBenchmark(TinyVector<double,4>(), 700000, a);
    tinyDAXPYBenchmark(TinyVector<double,5>(), 600000, a);
    tinyDAXPYBenchmark(TinyVector<double,6>(), 500000, a);
    tinyDAXPYBenchmark(TinyVector<double,7>(), 500000, a);
    tinyDAXPYBenchmark(TinyVector<double,8>(), 500000, a);
    tinyDAXPYBenchmark(TinyVector<double,9>(), 500000, a);
    tinyDAXPYBenchmark(TinyVector<double,10>(), 500000, a);

    return 0;
}