File: loopstruct.cpp

package info (click to toggle)
blitz%2B%2B 1%3A0.10-3.2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 13,276 kB
  • ctags: 12,037
  • sloc: cpp: 70,465; sh: 11,116; fortran: 1,510; python: 1,246; f90: 852; makefile: 701
file content (172 lines) | stat: -rw-r--r-- 3,266 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
// CC -64 -LANG:std -LANG:restrict -Ofast -PHASE:clist -IPA=off -IPA:INLINE=off
//
// When compiled with the above options, these are the results on convex:
// func1: 34.6484
// func2: 24.6603
// func3: 17.2822
//
// func1 is a simple C-style loop.
// func2 has the pointers stuck inside a struct.  Prefetching no longer occurs.
// func3 has the pointers inside "iterator" structs, and a read is done
//       using an inline operator().  This somehow results in the loop
//       being unrolled only twice, instead of 4 times.

#include <iostream.h>
#include <sys/resource.h>

// Paste the Timer class in here so you don't have to have Blitz++

class Timer {

public:
    Timer() 
    { 
        state_ = uninitialized;
    }

    void start()
    { 
        state_ = running;
        t1_ = systemTime();
    }

    void stop()
    {
        t2_ = systemTime();
        state_ = stopped;
    }

/* Compaq cxx compiler in ansi mode cannot print out long double type! */
#if defined(__DECCXX)
    double elapsedSeconds()
#else
    long double elapsedSeconds()
#endif
    {
        return t2_ - t1_;
    }

private:
    Timer(Timer&) { }
    void operator=(Timer&) { }

    long double systemTime()
    {
        getrusage(RUSAGE_SELF, &resourceUsage_);
        double seconds = resourceUsage_.ru_utime.tv_sec 
            + resourceUsage_.ru_stime.tv_sec;
        double micros  = resourceUsage_.ru_utime.tv_usec 
            + resourceUsage_.ru_stime.tv_usec;
        return seconds + micros/1.0e6;
    }

    enum { uninitialized, running, stopped } state_;

    struct rusage resourceUsage_;
    long double t1_, t2_;
};


// func1:  Simple version

void func1(double* restrict x, double* restrict y, double a, int N)
{
    for (int i=0; i < N; ++i)
        y[i] += a*x[i];
}


// func2: With pointers inside a struct

struct A {
    double* restrict x;
    double* restrict y;
    double a;
    int N;
};

void func2(A& z)
{
    for (int i=0; i < z.N; ++i)
        z.y[i] += z.a * z.x[i];
}


// func3: with very simple "iterators" (the B struct).

struct B {
    int q;
    double* restrict data;
    double operator()(int i)
    { return data[i]; }
};

struct C {
    B x;
    B y;
    double a;
    int N;
};

void func3(C& z)
{
    for (int i=0; i < z.N; ++i)
        z.y.data[i] += z.a * z.x(i);
}


// Initialize array

void init(double* x, int N)
{
    for (int i=0; i < N; ++i)
        x[i] = 1.0;
}

int main()
{
    Timer timer;

    int N = 1000000;
    int iters = 20;
    double Mflops = N * iters * 2 / 1000000.;

    double* x = new double[N];
    double* y = new double[N];
    double a = .14989182;
    init(x,N);
    init(y,N);

    timer.start();
    for (int i=0; i < iters; ++i)
        func1(x,y,a,N);
    timer.stop();

    cout << "func1: " << Mflops/timer.elapsedSeconds() << endl;

    timer.start();
    A z;
    z.x = x;
    z.y = y;
    z.a = a;
    z.N = N;
    for (int i=0; i < iters; ++i)
        func2(z);
    timer.stop();

    cout << "func2: " << Mflops/timer.elapsedSeconds() << endl;

    timer.start();
    {
    C z;
    z.x.data = x;
    z.y.data = y;
    z.a = a;
    z.N = N;
    for (int i=0; i < iters; ++i)
        func3(z);
    }
    timer.stop();
    cout << "func3: " << Mflops/timer.elapsedSeconds() << endl;
}