| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 
 |       SUBROUTINE PIROW2COL( ICTXT, M, N, NB, VS, LDVS, VD, LDVD,
     $                     RSRC, CSRC, RDEST, CDEST, WORK)
*
*  -- ScaLAPACK tools routine (version 1.7) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     May 1, 1997
*
*     .. Scalar Arguments ..
      INTEGER            CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
     $                   RDEST, RSRC
*     ..
*     .. Array Arguments ..
      INTEGER            VD( LDVD, * ), VS( LDVS, * ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  Take a block of vectors with M total rows which are distributed over
*  a row of processes, and distribute those rows over a column of
*  processes. This routine minimizes communication by sending all
*  information it has that a given process in the CDEST needs at once.
*  To do this it uses the least common multiple (LCM) concept.  This is
*  simply the realization that if I have part of a vector split over a
*  process row consisting of Q processes, and I want to send all of that
*  vector that I own to a new vector distributed over P processes within
*  a process column, that after I find the process in RDEST that owns
*  the row of the vector I'm currently looking at, he will want every
*  ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N).
*
*  Arguments
*  =========
*
*  Rem:  MP, resp. NQ, denotes the number of local rows, resp. local
*  ====  columns, necessary to store a global vector of dimension M
*        across P processes, resp. N over Q processes.
*
*  ICTXT   (global input) INTEGER
*          The BLACS context handle, indicating the global context of
*          the operation. The context itself is global.
*
*  M       (global input) INTEGER
*          The number of global rows each vector has.
*
*  N       (global input) INTEGER
*          The number of vectors in the vector block
*
*  NB      (global input) INTEGER
*          The blocking factor used to divide the rows of the vector
*          amongst the processes of a row.
*
*  VS      (local input) @(typec)
*          Array of dimension (LDVS,N), the block of vectors stored on
*          process row RSRC to be put into memory VD, and stored on
*          process column CDEST.
*
*  LDVS    (local input) INTEGER
*          The leading dimension of VS.
*
*  VD      (local output) @(typec)
*          Array of dimension (LDVD,N), on output, the contents of VD
*          stored on process column CDEST will be here.
*
*  LDVD    (local input) INTEGER
*          The leading dimension of VD.
*
*  RSRC    (global input) INTEGER
*          The process row VS is distributed over.
*
*  CSRC    (global input) INTEGER
*          The process column the distributed block of vectors VS
*          begins on.
*
*  RDEST   (global input) INTEGER
*          The process row that VD begins on.
*
*  CDEST   (global input) INTEGER
*          The process column to distribute VD over.
*
*  WORK    (local workspace) @(typec)
*          Array, dimension (LDW). The required size of work varies:
*          if( nprow.eq.npcol ) then
*             LDW = 0; WORK not accessed.
*          else
*            lcm = least common multiple of process rows and columns.
*             Mq  = number of rows of VS on my process.
*             npcol = number of process columns
*             CEIL = the ceiling of given operation
*             LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) )
*          end if
*
*  =====================================================================
*
*     .. Local Scalars ..
      INTEGER            CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB,
     $                   JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW,
     $                   NBLOCKS, NPCOL, NPROW, RBLKSKIP
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, IGESD2D, IGERV2D, ILACPY
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      EXTERNAL           ILCM, NUMROC
*     ..
*     .. Executable Statements ..
*
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*     If we are not in special case for NPROW = NPCOL where there is no
*     copying required
*
      IF( NPROW .NE. NPCOL ) THEN
         LCM = ILCM( NPROW, NPCOL )
         RBLKSKIP = LCM / NPCOL
         CBLKSKIP = LCM / NPROW
*
*        If I have part of VS, the source vector(s)
*
         IF( MYROW.EQ.RSRC ) THEN
*
            ISTART = 1
*
*           Figure my distance from CSRC: the process in CDEST the same
*           distance from RDEST will want my first block
*
            MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL )
            MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL )
            IRDEST = MOD( RDEST+MYDIST, NPROW )
*
*           Loop over all possible destination processes
*
            DO 20 K = 1, RBLKSKIP
               JJ = 1
*
*              If I am not destination process
*
               IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN
*
*                 Pack all data I own that destination needs
*
                  DO 10 II = ISTART, MQ, NB*RBLKSKIP
                     JB = MIN( NB, MQ-II+1 )
                     CALL ILACPY( 'G', JB, N, VS(II,1), LDVS,
     $                            WORK(JJ), JB )
                     JJ = JJ + NB*N
   10             CONTINUE
*
*                 Figure how many rows are to be sent and send them if
*                 necessary, NOTE: will send extra if NB > JB
*
                  JJ = JJ - 1
                  IF( JJ.GT.0 )
     $               CALL IGESD2D( ICTXT, JJ, 1, WORK, JJ, IRDEST,
     $                             CDEST )
*
*              I am both source and destination, save where to start
*              copying from for later use
*
               ELSE
                  ICPY = ISTART
               END IF
*
               ISTART = ISTART + NB
               IRDEST = MOD( IRDEST+NPCOL, NPROW )
   20       CONTINUE
         END IF
*
*        If I should receive info into VD
*
         IF( MYCOL.EQ.CDEST ) THEN
*
            ISTART = 1
*
*           Figure my distance from CDEST: the process in CSRC the same
*           distance from RSRC will have my first block
*
            MYDIST = MOD( NPROW+MYROW-RDEST, NPROW )
            MP = NUMROC( M, NB, MYROW, RDEST, NPROW )
            ICSRC = MOD( CSRC+MYDIST, NPCOL )
*
*           Loop over all sending processes
*
            DO 50 K = 1, CBLKSKIP
*
*              If I don't already possess the required data
*
               IF( (MYROW.NE.RSRC).OR.(MYCOL.NE.ICSRC) ) THEN
*
*                 Figure how many rows to receive, and receive them
*                 NOTE: may receive to much -- NB instead of JB
*
                  NBLOCKS = (MP - ISTART + NB) / NB
                  JJ = ((NBLOCKS+CBLKSKIP-1) / CBLKSKIP)*NB
                  IF( JJ.GT.0 )
     $               CALL IGERV2D( ICTXT, JJ, N, WORK, JJ, RSRC, ICSRC )
*
*                 Copy data to destination vector
*
                  JJ = 1
                  DO 30 II = ISTART, MP, NB*CBLKSKIP
                     JB = MIN( NB, MP-II+1 )
                     CALL ILACPY( 'G', JB, N, WORK(JJ), JB, VD(II,1),
     $                            LDVD )
                     JJ = JJ + NB*N
   30             CONTINUE
*
*              If I am both source and destination
*
               ELSE
                  JJ = ICPY
                  DO 40 II = ISTART, MP, NB*CBLKSKIP
                     JB = MIN( NB, MP-II+1 )
                     CALL ILACPY( 'G', JB, N, VS(JJ,1), LDVS, VD(II,1),
     $                            LDVD )
                     JJ = JJ + NB*RBLKSKIP
   40             CONTINUE
               END IF
               ISTART = ISTART + NB
               ICSRC = MOD( ICSRC+NPROW, NPCOL )
   50       CONTINUE
         END IF
*
*     if NPROW = NPCOL, there is a one-to-one correspondance between
*     process rows and columns, so no work space or copying required
*
      ELSE
*
         IF( MYROW.EQ.RSRC ) THEN
*
*           Figure my distance from CSRC: the process in CDEST the same
*           distance from RDEST will want my piece of the vector
*
            MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL )
            MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL )
            IRDEST = MOD( RDEST+MYDIST, NPROW )
            IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN
               CALL IGESD2D( ICTXT, MQ, N, VS, LDVS, IRDEST, CDEST )
            ELSE
               CALL ILACPY( 'G', MQ, N, VS, LDVS, VD, LDVD )
            END IF
         END IF
         IF( MYCOL.EQ.CDEST ) THEN
*
*           Figure my distance from RDEST: the process in RSRC the same
*           distance from CSRC will have my piece of the vector
*
            MYDIST = MOD( NPROW+MYROW-RDEST, NPROW )
            MP = NUMROC( M, NB, MYROW, RDEST, NPROW )
            ICSRC = MOD( CSRC+MYDIST, NPCOL )
            IF( (MYCOL.NE.ICSRC).OR.(MYROW.NE. RSRC) )
     $         CALL IGERV2D( ICTXT, MP, N, VD, LDVD, RSRC, ICSRC )
         END IF
      END IF
*
      RETURN
*
*     End of PIROW2COL
*
      END
 |