File: ga_shift.F

package info (click to toggle)
ga 5.9.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 18,472 kB
  • sloc: ansic: 192,963; fortran: 53,761; f90: 11,218; cpp: 5,784; makefile: 2,248; sh: 1,945; python: 1,734; perl: 534; csh: 134; asm: 106
file content (227 lines) | stat: -rw-r--r-- 6,793 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#if HAVE_CONFIG_H
#   include "config.fh"
#endif

# define THRESH 1d-13
# define THRESHF 1e-5
#define MISMATCH(x,y) abs(x-y)/max(1d0,abs(x)).gt.THRESH
#define MISMATCHF(x,y) abs(x-y)/max(1.0,abs(x)).gt.THRESHF


      program main
      implicit none
#include "mafdecls.fh"
#include "global.fh"
      integer DIM, MAXPROC, NITER
      parameter (DIM = 2, MAXPROC = 1000000, NITER=10)
      integer MAXSIZE
      parameter (MAXSIZE = 2**NITER)
      integer heap, stack, fudge, ma_heap
      integer me, nproc, nshift
      integer iter, lsize, ndim, dims(2), lo(2), hi(2), chunk(2), ld
      integer g_a
      integer i, j, ioff
      double precision tbeg, t_init, t_term, t_ma_init, t_msg_init
      double precision t_create, t_destroy, t_put, t_get, t_sync, bw
      double precision a(MAXSIZE,MAXSIZE)
      logical status
c     
c***  Intitialize a message passing library
c
#include "mp3.fh"
c     t_msg_init = ga_wtime() - tbeg
c
c***  Initialize GA
c
c     There are 2 choices: ga_initialize or ga_initialize_ltd.
c     In the first case, there is no explicit limit on memory usage.
c     In the second, user can set limit (per processor) in bytes.
c
      tbeg = ga_wtime()
      call ga_initialize()
      t_init = ga_wtime() - tbeg

      nproc = ga_nnodes()
      me = ga_nodeid()
      nshift = nproc/2
c
c     call ga_dgop(1,t_msg_init,1,'+')
c     t_msg_init = t_msg_init/dble(nproc)
c     if (me.eq.0) then
c       write(6,'(a,f16.6)') 'Time spent in runtime initialization: ',
c    +                       t_msg_init
c     endif
      call ga_dgop(2,t_init,1,'+')
      t_init = t_init/dble(nproc)
      if (me.eq.0) then
        write(6,'(a,f16.6)') 'Time spent in GA initialization:      ',
     +                       t_init
      endif
c     we can also use GA_set_memory_limit BEFORE first ga_create call
c
      heap = 2000*2000*4
      fudge = 100
      stack = 2000*2000
      ma_heap = heap + fudge 
      call GA_set_memory_limit(8*ma_heap)
c
      if(ga_nodeid().eq.0)then
         write(6,'(a)') ' '
         write(6,'(a)') ' GA initialized '
         write(6,'(a)') ' '
         call ffflush(6)
      endif
c
c***  Initialize the MA package
c     MA must be initialized before any global array is allocated
c
      tbeg = ga_wtime()
      status = ma_init(MT_DCPL, stack, ma_heap)
      t_ma_init = ga_wtime() - tbeg
      if (.not. status) call ga_error('ma_init failed',-1) 
      call ga_dgop(3,t_ma_init,1,'+')
      t_ma_init = t_ma_init/dble(nproc)
      if (me.eq.0) then
        write(6,'(a,f16.6)') 'Time spent in MA initialization:      ',
     +                       t_ma_init
      endif
c
      if(me.eq.0)then
        write(6,'(a,i8,a)') 'Using ', nproc, ' process(es)'
        call ffflush(6)
      endif
c
c*** begin looping over tests
c
      lsize = 1
      do iter = 1, NITER
        lsize = 2*lsize
c
c*** create GA that is lsize X lsize*nproc in dimension
c
        if (me.eq.0) then
          write(6,'(a)') ' '
          write(6,'(a,i8)') ' Testing block size of ',lsize*lsize
          write(6,'(a)') ' '
        endif
        ndim = 2
        dims(1) = lsize
        dims(2) = nproc*lsize
        chunk(1) = lsize
        chunk(2) = -1
        call ga_sync
        tbeg = ga_wtime()
        g_a = ga_create_handle()
        call ga_set_data(g_a,ndim,dims,MT_DBL)
        call ga_set_chunk(g_a,chunk)
        status = ga_allocate(g_a)
        if (.not.status) then
          call ga_error('ga_allocate failed for size: ',lsize)
        endif
        t_create = ga_wtime() - tbeg
        call ga_dgop(iter,t_create,1,'+')
        t_create = t_create/dble(nproc)
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Time spent creating GA:               ',
     +                       t_create
        endif
c
c*** Fill local buffer with values
c
        ioff = mod(me+nshift,nproc)
        ioff = ioff*lsize**2
        do j = 1, lsize
          do i = 1, lsize
            a(i,j) = dble((j-1)*lsize+i+ioff)
          end do
        end do
        ld = MAXSIZE
        lo(1) = 1
        hi(1) = lsize
        lo(2) = mod(me+nshift,nproc)
        lo(2) = lo(2)*lsize + 1
        hi(2) = lo(2) - 1 + lsize
        call ga_zero(g_a)
        tbeg = ga_wtime()
        call nga_put(g_a,lo,hi,a,ld)
        t_put = ga_wtime() - tbeg
        call ga_dgop(iter+1,t_put,1,'+')
        t_put = t_put/dble(nproc)
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Time spent in put:                    ',
     +                       t_put
        endif
        bw = dble(8*lsize*lsize)/t_put
        bw = bw/1000000.0d00
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Bandwidth for put (MB/s):             ',
     +                       bw
        endif
        tbeg = ga_wtime()
        call ga_sync
        t_sync = ga_wtime() - tbeg
        call ga_dgop(iter+2,t_sync,1,'+')
        t_sync = t_sync/dble(nproc)
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Time spent in sync:                   ',
     +                       t_sync
        endif
        bw = dble(8*lsize*lsize)/(t_put+t_sync)
        bw = bw/1000000.0d00
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Bandwidth for put with sync (MB/s):   ',
     +                       bw
        endif
        do j = 1, lsize
          do i = 1, lsize
            a(i,j) = 0.0d00
          end do
        end do
        call ga_sync
        tbeg = ga_wtime()
        call nga_get(g_a,lo,hi,a,ld)
        t_get = ga_wtime() - tbeg
        ioff = mod(me+nshift,nproc)
        ioff = ioff*lsize**2
        do j = 1, lsize
          do i = 1, lsize
            if (a(i,j).ne.dble((j-1)*lsize+i+ioff)) then
              write(6,'(i4,a,i8,a,i8,a)') me,' mismatch for element (',
     +                                    i,',',j,')' 
            endif
          end do
        end do
        call ga_dgop(iter+3,t_get,1,'+')
        t_get = t_get/dble(nproc)
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Time spent in get:                    ',
     +                       t_get
        endif
        bw = dble(8*lsize*lsize)/t_get
        bw = bw/1000000.0d00
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Bandwidth for get (MB/s):             ',
     +                       bw
        endif
        call ga_sync
        tbeg = ga_wtime()
        status = ga_destroy(g_a)
        t_destroy = ga_wtime() - tbeg
        call ga_dgop(iter+4,t_destroy,1,'+')
        t_destroy = t_destroy/dble(nproc)
        if (me.eq.0) then
          write(6,'(a,f16.6)') 'Time spent destroying GA:             ',
     +                       t_destroy
        endif
      end do
c
c***  Tidy up the GA package
c
      call ga_terminate()
c
c***  Tidy up after message-passing library
c
      call MP_FINALIZE()
c
      stop
      end