File: pgemraux.c

package info (click to toggle)
scalapack 1.7-5
  • links: PTS
  • area: main
  • in suites: woody
  • size: 33,956 kB
  • ctags: 30,434
  • sloc: fortran: 309,685; ansic: 64,027; makefile: 1,836; sh: 4
file content (311 lines) | stat: -rw-r--r-- 8,926 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#include "redist.h"
/* $Id: pgemraux.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
 * 
 * some functions used by the pigemr2d routine see file pigemr.c for more
 * documentation.
 * 
 * Created March 1993 by B. Tourancheau (See sccs for modifications). */
#define static2 static
#if defined(Add_) || defined(f77IsF2C)
#define fortran_mr2d pigemr2do_
#define fortran_mr2dnew pigemr2d_
#elif defined(UpCase)
#define fortran_mr2dnew PIGEMR2D
#define fortran_mr2d PIGEMR2DO
#define icopy_ ICOPY
#define ilacpy_ ILACPY
#else
#define fortran_mr2d pigemr2do
#define fortran_mr2dnew pigemr2d
#define icopy_ icopy
#define ilacpy_ ilacpy
#endif
#define Clacpy Cigelacpy
void  Clacpy();
typedef struct {
  int   desctype;
  int   ctxt;
  int   m;
  int   n;
  int   nbrow;
  int   nbcol;
  int   sprow;
  int   spcol;
  int   lda;
}     MDESC;
#define BLOCK_CYCLIC_2D 1
typedef struct {
  int   lstart;
  int   len;
}     IDESC;
#define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
#define max(A,B) ((A)>(B)?(A):(B))
#define min(A,B) ((A)>(B)?(B):(A))
#define DIVUP(a,b) ( ((a)-1) /(b)+1)
#define ROUNDUP(a,b) (DIVUP(a,b)*(b))
#ifdef MALLOCDEBUG
#define malloc mymalloc
#define free myfree
#define realloc myrealloc
#endif
/* Cblacs */
extern void Cblacs_pcoord();
extern int Cblacs_pnum();
extern void Csetpvmtids();
extern void Cblacs_get();
extern void Cblacs_pinfo();
extern void Cblacs_gridinfo();
extern void Cblacs_gridinit();
extern void Cblacs_exit();
extern void Cblacs_gridexit();
extern void Cblacs_setup();
extern void Cigebs2d();
extern void Cigebr2d();
extern void Cigesd2d();
extern void Cigerv2d();
extern void Cigsum2d();
extern void Cigamn2d();
extern void Cigamx2d();
extern void Cigesd2d();
extern void Cigerv2d();
/* lapack */
void  ilacpy_();
/* aux fonctions */
extern int localindice();
extern void *mr2d_malloc();
extern int ppcm();
extern int localsize();
extern int memoryblocksize();
extern int changeorigin();
extern void paramcheck();
/* tools and others function */
#define scanD0 igescanD0
#define dispmat igedispmat
#define setmemory igesetmemory
#define freememory igefreememory
#define scan_intervals igescan_intervals
extern void scanD0();
extern void dispmat();
extern void setmemory();
extern void freememory();
extern int scan_intervals();
extern void Cpigemr2do();
extern void Cpigemr2d();
/* some defines for Cpigemr2do */
#define SENDBUFF 0
#define RECVBUFF 1
#define SIZEBUFF 2
#if 0
#define DEBUG
#endif
#ifndef DEBUG
#define NDEBUG
#endif
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
void *
mr2d_malloc(n)
  int   n;
{
  void *ptr;
  assert(n > 0);
  ptr = (void *) malloc(n);
  if (ptr == NULL) {
    fprintf(stderr, "xxmr2d:out of memory\n");
    exit(2);
  }
  return ptr;
}
int 
pgcd(a, b)
  int   a, b;
{
  int   aux;
  if (a < b)
    return pgcd(b, a);
  else {
    aux = a % b;
    if (aux == 0)
      return b;
    else
      return pgcd(b, aux);
  }
}
int 
ppcm(a, b)
  int   a, b;
{
  int   pg;
  pg = pgcd(a, b);
  return a * (b / pg);
}
/* localsize:return the number of rows on the local processor given by its
 * row number myprow, of a distributed matrix with m rows distributed of on a
 * grid of processors with p rows with blocksize nbrow : this procedure can
 * also be used to compute the number of cols by replacing rows by cols */
int 
localsize(myprow, p, nbrow, m)
  int   myprow, p, nbrow, m;
{
  int   templateheight, blockheight;
  templateheight = p * nbrow;
  if (m % templateheight != 0) {	/* not an exact boundary */
    if ((m % templateheight) > (nbrow * myprow)) {	/* processor
							 * (myprow,mypcol) has
							 * some elements in that
							 * incomplete template */
      if ((m % templateheight) >= (nbrow * (myprow + 1))) {	/* processor
								 * (myprow,mypcol)'s
								 * part is complete */
	blockheight = (m / templateheight) * nbrow + nbrow;
      } else {	/* processor (myprow,mypcol)'s part is not complete */
	blockheight = (m / templateheight) * nbrow + (m % nbrow);
      };	/* if ((m%templateheight) > (nbrow*(myprow+1))) */
    } else {	/* processor (myprow,mypcol) has no element in that
		 * incomplete template */
      blockheight = (m / templateheight) * nbrow;
    };	/* if ((m%templateheight) > (nbrow*myprow)) */
  } else {	/* exact boundary */
    blockheight = m / p;	/* (m/templateheight) * nbrow */
  };	/* if (m%templateheight !=0) */
  return blockheight;
}
/****************************************************************/
/* Returns the exact memory block size corresponding to the parameters */
int
memoryblocksize(a)
  MDESC *a;
{
  int   myprow, mypcol, p, q;
  /* Compute the (myprow,mypcol) indices of processor mypnum in P0xQ0 We
   * assume the row-major ordering of the BLACS */
  Cblacs_gridinfo(a->ctxt, &p, &q, &myprow, &mypcol);
  myprow = SHIFT(myprow, a->sprow, p);
  mypcol = SHIFT(mypcol, a->spcol, q);
  assert(myprow >= 0 && mypcol >= 0);
  return localsize(myprow, p, a->nbrow, a->m) *
	localsize(mypcol, q, a->nbcol, a->n);
}
void 
checkequal(ctxt, a)
  int   a, ctxt;
{
  int   np, dummy, nbrow, myp, b;
  Cblacs_gridinfo(ctxt, &nbrow, &np, &dummy, &myp);
  assert(nbrow == 1);
  if (np == 1)
    return;
  if (myp == 0) {
    Cigesd2d(ctxt, 1, 1, &a, 1, 0, 1);
    Cigerv2d(ctxt, 1, 1, &b, 1, 0, np - 1);
    assert(a == b);
  } else {
    Cigerv2d(ctxt, 1, 1, &b, 1, 0, myp - 1);
    assert(a == b);
    Cigesd2d(ctxt, 1, 1, &a, 1, 0, (myp + 1) % np);
  }
}
void 
paramcheck(a, i, j, m, n, p, q, gcontext)
  MDESC *a;
  int   i, j, m, n, p, q;
{
  int   p2, q2, myprow, mypcol;
#ifndef NDEBUG
  checkequal(gcontext, p);
  checkequal(gcontext, q);
  checkequal(gcontext, a->sprow);
  checkequal(gcontext, a->spcol);
  checkequal(gcontext, a->m);
  checkequal(gcontext, a->n);
  checkequal(gcontext, i);
  checkequal(gcontext, j);
  checkequal(gcontext, a->nbrow);
  checkequal(gcontext, a->nbcol);
#endif
  Cblacs_gridinfo(a->ctxt, &p2, &q2, &myprow, &mypcol);
  /* compatibility T3D, must check myprow  and mypcol are within bounds */
  if (myprow >= p2 || mypcol >= q2)
    myprow = mypcol = -1;
  if ((myprow >= 0 || mypcol >= 0) && (p2 != p && q2 != q)) {
    fprintf(stderr, "??MR2D:incoherent p,q parameters\n");
    exit(1);
  }
  assert(myprow < p && mypcol < q);
  if (a->sprow < 0 || a->sprow >= p || a->spcol < 0 || a->spcol >= q) {
    fprintf(stderr, "??MR2D:Bad first processor coordinates\n");
    exit(1);
  }
  if (i < 0 || j < 0 || i + m > a->m || j + n > a->n) {
    fprintf(stderr, "??MR2D:Bad submatrix:i=%d,j=%d,\
m=%d,n=%d,M=%d,N=%d\n",
	    i, j, m, n, a->m, a->n);
    exit(1);
  }
  if ((myprow >= 0 || mypcol >= 0) &&
      localsize(SHIFT(myprow, a->sprow, p), p, a->nbrow, a->m) > a->lda) {
    fprintf(stderr, "??MR2D:bad lda arg:row=%d,m=%d,p=%d,\
nbrow=%d,lda=%d,sprow=%d\n",
	    myprow, a->m, p, a->nbrow, a->lda, a->sprow);
    exit(1);
  }
}
/* to change from the submatrix beginning at line i to one beginning at line
 * i' with i'< blocksize return the line number on the local process where
 * the new matrix begin, the new process number, and i' */
int 
changeorigin(myp, sp, p, bs, i, decal, newsp)
  int   myp, sp, p, bs, i;
  int  *decal, *newsp;
{
  int   tempheight, firstblock, firsttemp;
  /* we begin by changing the parameters so that ia < templatewidth,... */
  tempheight = bs * p;
  firsttemp = i / tempheight;
  firstblock = (i / bs) % p;
  *newsp = (sp + firstblock) % p;
  if (myp >= 0)
    *decal = firsttemp * bs + (SHIFT(myp, sp, p) < firstblock ? bs : 0);
  else
    *decal = 0;
  return i % bs;
}
/******************************************************************/
/* Return the indice in local memory of element of indice a in the matrix */
int
localindice(ig, jg, templateheight, templatewidth, a)
  int   templateheight, templatewidth, ig, jg;
  MDESC *a;
/* Return the indice in local memory (scattered distribution) of the element
 * of indice a in global matrix */
{
  int   vtemp, htemp, vsubtemp, hsubtemp, il, jl;
  assert(ig >= 0 && ig < a->m && jg >= 0 && jg < a->n);
  /* coordinates in global matrix with the tests in intersect, ig MUST BE in
   * [0..m] and jg in [0..n] */
  /* coordinates of the template that "owns" the element */
  vtemp = ig / templateheight;
  htemp = jg / templatewidth;
  /* coordinates of the element in the subblock of the (vtemp, htemp)
   * template */
  vsubtemp = ig % a->nbrow;
  hsubtemp = jg % a->nbcol;
  /* coordinates of the element in the local block of the processor */
  il = a->nbrow * vtemp + vsubtemp;
  jl = a->nbcol * htemp + hsubtemp;
  assert(il < a->lda);
#ifndef NDEBUG
  {
    int   pr, pc, p, q, lp, lq;
    Cblacs_gridinfo(a->ctxt, &p, &q, &pr, &pc);
    p = templateheight / a->nbrow;
    q = templatewidth / a->nbcol;
    lp = ig % templateheight / a->nbrow;
    lq = jg % templatewidth / a->nbcol;
    assert(lp == SHIFT(pr, a->sprow, p));
    assert(lq == SHIFT(pc, a->spcol, q));
  }
#endif
  return (jl * a->lda + il);
}