File: pscol2row.f

package info (click to toggle)
scalapack 2.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 37,012 kB
  • sloc: fortran: 339,113; ansic: 74,517; makefile: 1,494; sh: 34
file content (272 lines) | stat: -rw-r--r-- 9,125 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
      SUBROUTINE PSCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
     $                      CSRC, RDEST, CDEST, WORK)
*
*  -- ScaLAPACK tools routine (version 1.7) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     May 1, 1997
*
*     .. Scalar Arguments ..
      INTEGER            CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
     $                   RDEST, RSRC
*     ..
*     .. Array Arguments ..
      REAL               VD( LDVD, * ), VS( LDVS, * ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  Take a block of vectors with M total rows which are distributed over
*  a column of processes, and distribute those rows over a row of
*  processes. This routine minimizes communication by sending all
*  information it has that a given process in the RDEST needs at once.
*  To do this it uses the least common multiple (LCM) concept.  This is
*  simply the realization that if I have part of a vector split over a
*  process column consisting of P processes, and I want to send all of
*  that vector that I own to a new vector distributed over Q processes
*  within a process row, that after I find the process in RDEST that
*  owns the row of the vector I'm currently looking at, he will want
*  every ( (LCM(P,Q) / P ) block of my vector (the block being of size
*  NB x N).
*
*  Arguments
*  =========
*
*  Rem:  MP, resp. NQ, denotes the number of local rows, resp. local
*  ====  columns, necessary to store a global vector of dimension M
*        across P processes, resp. N over Q processes.
*
*  ICTXT   (global input) INTEGER
*          The BLACS context handle, indicating the global context of
*          the operation. The context itself is global.
*
*  M       (global input) INTEGER
*          The number of global rows each vector has.
*
*  N       (global input) INTEGER
*          The number of vectors in the vector block.
*
*  NB      (global input) INTEGER
*          The blocking factor used to divide the rows of the vector
*          amongst the processes of a column.
*
*  VS      (local input) REAL
*          Array of dimension (LDVS,N), the block of vectors stored on
*          process column CSRC to be put into memory VD, and stored
*          on process row RDEST.
*
*  LDVS    (local input) INTEGER
*          The leading dimension of VS, LDVS >= MAX( 1, MP ).
*
*  VD      (local output) REAL
*          Array of dimension (LDVD,N), on output, the contents of VS
*          stored on process row RDEST will be here.
*
*  LDVD    (local input) INTEGER
*          The leading dimension of VD, LDVD >= MAX( 1, MQ ).
*
*  RSRC    (global input) INTEGER
*          The process row the distributed block of vectors VS begins
*          on.
*
*  CSRC    (global input) INTEGER
*          The process column VS is distributed over.
*
*  RDEST   (global input) INTEGER
*          The process row to distribute VD over.
*
*  CDEST   (global input) INTEGER
*          The process column that VD begins on.
*
*  WORK    (local workspace) REAL
*          Array of dimension (LDW), the required size of work varies:
*          if( nprow.eq.npcol ) then
*             LDW = 0; WORK not accessed.
*          else
*             lcm = least common multiple of process rows and columns.
*             Mp  = number of rows of VS on my process.
*             nprow = number of process rows
*             CEIL = the ceiling of given operation
*             LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
*          end if
*
*  =====================================================================
*
*     .. Local Scalars ..
      INTEGER            CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
     $                   JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW,
     $                   NBLOCKS, NPCOL, NPROW, RBLKSKIP
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, SGESD2D, SGERV2D, SLACPY
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      EXTERNAL           ILCM, NUMROC
*     ..
*     .. Executable Statements ..
*
*
*     .. Initialize Variables ..
*
      ICPY = 0
*      
*     Get grid parameters.
*
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*     If we are not in special case for NPROW = NPCOL where there
*     is no copying required
*
      IF( NPROW.NE.NPCOL ) THEN
         LCM = ILCM( NPROW, NPCOL )
         RBLKSKIP = LCM / NPCOL
         CBLKSKIP = LCM / NPROW
*
*        If I have part of VS, the source vector(s)
*
         IF( MYCOL.EQ.CSRC ) THEN
*
            ISTART = 1
*
*           Figure my distance from RSRC: the process in RDEST the same
*           distance from CDEST will want my first block
*
            MYDIST = MOD( NPROW+MYROW-RSRC, NPROW )
            MP = NUMROC( M, NB, MYROW, RSRC, NPROW )
            ICDEST = MOD( CDEST+MYDIST, NPCOL )
*
*           Loop over all possible destination processes
*
            DO 20 K = 1, CBLKSKIP
               JJ = 1
*
*              If I am not destination process
*
               IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN
*
*                 Pack all data I own that destination needs
*
                  DO 10 II = ISTART, MP, NB*CBLKSKIP
                     JB = MIN(NB, MP-II+1)
                     CALL SLACPY( 'G', JB, N, VS(II,1), LDVS,
     $                            WORK(JJ), JB )
                     JJ = JJ + NB*N
   10             CONTINUE
*
*                 Figure how many rows are to be sent and send them if
*                 necessary (NOTE: will send extra if NB > JB)
*
                  JJ = JJ - 1
                  IF( JJ.GT.0 )
     $               CALL SGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST,
     $                             ICDEST )
*
               ELSE
*
*                 I am both source and destination, save where to start
*                 copying from for later use.
*
                  ICPY = ISTART
               END IF
*
               ISTART = ISTART + NB
               ICDEST = MOD(ICDEST+NPROW, NPCOL)
   20       CONTINUE
         END IF
*
*        If I should receive info into VD
*
         IF( MYROW.EQ.RDEST ) THEN
*
            ISTART = 1
*
*           Figure my distance from CDEST: the process in CSRC the same
*           distance from RSRC will have my first block.
*
            MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL )
            MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL )
            IRSRC = MOD( RSRC+MYDIST, NPROW )
            DO 50 K = 1, RBLKSKIP
*
*              If I don't already possess the required data
*
               IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN
*
*                 Figure how many rows to receive, and receive them
*                 NOTE: may receive to much -- NB instead of JB
*
                  NBLOCKS = (MQ - ISTART + NB) / NB
                  JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB
                  IF( JJ.GT.0 )
     $               CALL SGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC )
*
*                 Copy data to destination vector
*
                  JJ = 1
                  DO 30 II = ISTART, MQ, NB*RBLKSKIP
                     JB = MIN( NB, MQ-II+1 )
                     CALL SLACPY( 'G', JB, N, WORK(JJ), JB,
     $                            VD(II,1), LDVD )
                     JJ = JJ + NB*N
   30             CONTINUE
*
*                 If I am both source and destination
*
               ELSE
                  JJ = ICPY
                  DO 40 II = ISTART, MQ, NB*RBLKSKIP
                     JB = MIN( NB, MQ-II+1 )
                     CALL SLACPY( 'G', JB, N, VS(JJ,1), LDVS,
     $                            VD(II,1), LDVD )
                     JJ = JJ + NB*CBLKSKIP
   40             CONTINUE
               END IF
               ISTART = ISTART + NB
               IRSRC = MOD( IRSRC+NPCOL, NPROW )
   50       CONTINUE
         END IF
*
*     If NPROW = NPCOL, there is a one-to-one correspondance between
*     process rows and columns, so no work space or copying required
*
      ELSE
*
         IF( MYCOL.EQ.CSRC ) THEN
*
*           Figure my distance from RSRC: the process in RDEST the same
*           distance from CDEST will want my piece of the vector.
*
            MYDIST = MOD( NPROW+MYROW-RSRC, NPROW )
            MP = NUMROC( M, NB, MYROW, RSRC, NPROW )
            ICDEST = MOD( CDEST+MYDIST, NPCOL )
*
            IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN
               CALL SGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST )
            ELSE
               CALL SLACPY( 'G', MP, N, VS, LDVS, VD, LDVD )
            END IF
         END IF
*
         IF( MYROW.EQ.RDEST ) THEN
*
*           Figure my distance from CDEST: the process in CSRC the same
*           distance from RSRC will have my piece of the vector.
*
            MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL )
            MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL )
            IRSRC = MOD( RSRC+MYDIST, NPROW )
*
            IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) )
     $         CALL SGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC )
*
          END IF
*
      END IF
*
      RETURN
*
*     End of PSCOL2ROW
*
      END