File: perfmod.F

package info (click to toggle)
ga 5.9.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 18,472 kB
  • sloc: ansic: 192,963; fortran: 53,761; f90: 11,218; cpp: 5,784; makefile: 2,248; sh: 1,945; python: 1,734; perl: 534; csh: 134; asm: 106
file content (311 lines) | stat: -rw-r--r-- 8,550 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#if HAVE_CONFIG_H
#   include "config.fh"
#endif
c $Id: perfmod.F,v 1.4 2000-05-25 01:09:20 d3h325 Exp $
c------------------------------------------------------------------------
c Program perfmod.x is used to test performance of GA put,get,accumulate |
c It has to be executed on four processors.                              |
c remote operations access data on processes 1,2,3 in the round-robin way|
c------------------------------------------------------------------------
c
      program perfmod
      implicit none
#include "mafdecls.fh"
#include "global.fh"
      integer heap,stack
c
c***  Intitialize a message passing library
c
#include "mp3.fh"
c
c     Intitialize the GA package
c
      call ga_initialize()
c
      if(ga_nnodes().ne.4 .and. ga_nodeid().eq.0)
     $   call ga_error('Must be run with 4 GA processes',ga_nnodes())
c
c***  Initialize the MA package
      heap = 450000
      stack =heap
      if (.not. ma_init(MT_DBL, heap, stack))
     $    call ga_error("ma init failed",heap+stack)
c
      call testit()
c
      if(ga_nodeid().eq.0) print *, 'All tests successful'
c
      call ga_terminate()
c
      call MP_FINALIZE()
      end


      subroutine testit()
      implicit none
#include "mafdecls.fh"
#include "global.fh"
c     
c
      integer n, nn, num_chunks
      parameter (n = 710, nn = n*n/4, num_chunks=12)
      double precision buf(nn)
c
      integer g_a
      integer ilo, ihi, jlo, jhi
      integer nproc, me, loop
      integer chunk(num_chunks)
      data    chunk /1,3,4,9,16,30,64,91,128,171,256,353/
c     
      nproc = ga_nnodes()
      me = ga_nodeid()
c
c***  Create global array
      if (.not. ga_create(MT_DBL, n, n, 'a', 0, 0, g_a))
     $     call ga_error(' ga_create failed ',n)
c     
      do loop=1,nn
         buf(loop) = .01d0
      enddo
      call ga_zero(g_a) 
c
      if (me .eq. 0) then
        write(*,*)' '
        print *,'> Performance of ga_get, ga_put & ga_acc n = ', n
        print *,' '
      endif
c
c     do loop=1,2
c
c***  local ops
c
      call ga_distribution(g_a, me, ilo, ihi, jlo, jhi)
      call TestPutGetAcc
     &     (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo, jhi, .true.)
c
c***  remote ops
c
      call TestPutGetAcc
     &     (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo, jhi,.false.)

c     enddo
      end


      subroutine TestPutGetAcc
     &      (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo,jhi, local)
      implicit none
#include "global.fh"
#include "testutil.fh"
      integer num_chunks, chunk(num_chunks)
      integer n, ilo, ihi, jlo,jhi,g_a
      double precision buf(*), tg, tp, ta
      double precision time_acc, time_get, time_put
      logical local
c
      integer me
      integer loop, jump, count, bytes
c
      me = ga_nodeid()
      if (me .eq. 0) then
        write(6,*)' '
        if(local) then
          write(6,'(21X,8hACCESS [,i3,1h:,i4,1h,,i3,1h:,i4,1h])') 
     &        ilo,ihi,jlo,jhi
        else
          write(6,'(21X,6hACCESS , 2x, 18Hremote section       )') 
        endif

        write(6,*)'bytes  loop         get                    put',
     &           '                 accumulate'
        call flush(6)
      endif
      call ga_sync()
c
      do loop = 1, num_chunks
        bytes = util_mdtob(1)*chunk(loop)*chunk(loop) !how much data is accessed
        jump  =  n/(60*loop) ! jump distance between consecutive patches
c
c       everybody touches own data 
        call ga_fill_patch(g_a, 1, n, 1, n , 1d0*me*loop)
        if (me .eq. 0) then
        tg=time_get(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
     $               local)
        endif
        call ga_sync()
c
c       everybody touches own data
        call ga_fill_patch(g_a, 1, n, 1, n , 1d0*me*loop)
        if (me .eq. 0) then
        tp=time_put(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count, 
     $               local)
        endif
        call ga_sync()
c
c       everybody touches own data
        call ga_fill_patch(g_a, 1, n, 1, n , 1d0*me*loop)
        if (me .eq. 0) then
        ta=time_acc(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
     $               local)
        endif
        call ga_sync()
c
        if (me .eq. 0) then
          write(6,77)bytes, count, tg, 1d-6*bytes/tg,
     &               tp, 1d-6*bytes/tp, ta, 1d-6*bytes/ta
          call flush(6)
        endif
      enddo
c
77    format(i6, i5, 3(d10.3, d9.2,'MB/s'))
      end



      double precision function
     &   time_acc(g_a, is, ie, js, je, buf, chunk, jump, count, local)
c
      implicit none
#include "global.fh"
#include "testutil.fh"
c
      integer g_a, chunk, jump, count, is, js, ie, je
      logical local 
      integer rows, cols, indx, shifti(3), shiftj(3)
c
      integer ilo, ihi, jlo, jhi
      double precision  seconds, buf 
c
      count = 0
      rows = ie - is + 1
      cols = je - js + 1
      shifti(1) = rows
      shifti(2) = 0
      shifti(3) = rows
      shiftj(1) = 0
      shiftj(2) = cols
      shiftj(3) = cols

      seconds = util_timer()
c
c       distance between consecutive patches increased by jump
c       to destroy locality of reference
        do ilo = is, ie -chunk-jump +1, chunk+jump
           ihi = ilo + chunk -1
           do jlo = js, je -chunk-jump +1, chunk+jump
              jhi = jlo + chunk -1
              count = count + 1
              if (local) then
                 call ga_acc(g_a, ilo, ihi, jlo, jhi, buf, chunk, 1d0)
              else
                 indx = Mod(count,3) + 1 
                 call ga_acc(g_a, ilo+shifti(indx), ihi+shifti(indx),
     $                       jlo+shiftj(indx), jhi+shiftj(indx), 
     $                       buf, chunk, 1d0)
              endif
           enddo
        enddo
      seconds = util_timer() - seconds
c
      time_acc = seconds/count
      end


      double precision function
     &    time_get(g_a, is, ie, js, je, buf, chunk, jump, count, local)
c
      implicit none
#include "global.fh"
#include "testutil.fh"
c
      integer g_a, chunk, jump, count, is, js, ie, je
      integer rows, cols, indx, shifti(3), shiftj(3)
      logical local
c
      integer ilo, ihi, jlo, jhi
      double precision  seconds, buf
c
      count = 0
      rows = ie - is + 1
      cols = je - js + 1
      shifti(1) = rows
      shifti(2) = 0
      shifti(3) = rows
      shiftj(1) = 0
      shiftj(2) = cols
      shiftj(3) = cols

      seconds = util_timer()
c
c       distance between consecutive patches increased by jump
c       to destroy locality of reference
        do ilo = is, ie -chunk-jump +1, chunk+jump
           ihi = ilo + chunk -1
           do jlo = js, je -chunk-jump +1, chunk+jump
              jhi = jlo + chunk -1
              count = count + 1
              if (local) then
                 call ga_get(g_a, ilo, ihi, jlo, jhi, buf, chunk)
              else
                 indx = Mod(count,3) + 1
                 call ga_get(g_a, ilo+shifti(indx), ihi+shifti(indx),
     $                       jlo+shiftj(indx), jhi+shiftj(indx),
     $                       buf, chunk)
              endif
           enddo
        enddo
      seconds = util_timer() - seconds
c
      time_get = seconds/count
      end



      double precision function
     &   time_put(g_a, is, ie, js, je, buf, chunk, jump, count, local)
c
      implicit none
#include "global.fh"
#include "testutil.fh"
c
      integer g_a, chunk, jump, count, is, js, ie, je
      integer rows, cols, indx, shifti(3), shiftj(3)
      logical local
c
      integer ilo, ihi, jlo, jhi
      double precision  seconds, buf
c
      count = 0
      rows = ie - is + 1
      cols = je - js + 1
      shifti(1) = rows
      shifti(2) = 0
      shifti(3) = rows
      shiftj(1) = 0
      shiftj(2) = cols
      shiftj(3) = cols

      seconds = util_timer()
c
c       distance between consecutive patches increased by jump
c       to destroy locality of reference
        do ilo = is, ie -chunk-jump +1, chunk+jump
           ihi = ilo + chunk -1
           do jlo = js, je -chunk-jump +1, chunk+jump
              jhi = jlo + chunk -1
              count = count + 1
              if (local) then
                 call ga_put(g_a, ilo, ihi, jlo, jhi, buf, chunk)
              else
                 indx = Mod(count,3) + 1
                 call ga_put(g_a, ilo+shifti(indx), ihi+shifti(indx),
     $                       jlo+shiftj(indx), jhi+shiftj(indx),
     $                       buf, chunk)
              endif
           enddo
        enddo
      seconds = util_timer() - seconds
c
      time_put = seconds/count
      end