File: CpdagemvAt.c

package info (click to toggle)
scalapack 1.6-13
links: PTS
area: main
in suites: potato
size: 30,476 kB
ctags: 25,789
sloc: fortran: 296,718; ansic: 51,265; makefile: 1,541; sh: 4
file content (185 lines) | stat: -rw-r--r-- 4,867 bytes
#include "tools.h"
void CpdagemvAt(trans, M, N, alpha, A, IA, JA, descA, X0, IX0, JX0, descX0, 
                incX0, beta0, Y0, IY0, JY0, descY0, incY0)
F_CHAR trans;
int M;
int N;
double alpha;
double *A;
int IA;
int JA;
int *descA;
double *X0;
int IX0;
int JX0;
int *descX0;
int incX0;
double beta0;
double *Y0;
int IY0;
int JY0;
int *descY0;
int  incY0;
/*
 *            ======(Y)======
 *                   N
 *            ---------------
 *      ||   |               |          A - M x N
 *      ||   |               |          Y - 1 x N
 *      ||   |               |          X - M x 1
 *     (X)   |      (A)      |  M
 *      ||   |               |          X will be replicated across all columns
 *      ||   |               |          Y will have space on all rows
 *      ||   |               |
 *      ||   |               |
 *            ---------------
 */
{
/*
 * .. External routines ..
 */
   char *ptop();
   void pchkmat();
   void pchkvec();
   void pberror_();
   void Cinfog2l();
   int Cnumroc2();
   void Cblacs_gridinfo();
   void Cdgebs2d();
   void Cdgebr2d();
   void Cdgsum2d();
   F_INTG_FCT dgemv_();
   void Cpdscal1();
   void Cpdcopy1();
   void Cpdaxpy1();

   char *top;
   int ctxt, nprow, npcol, myrow, mycol;
   int IX, JX, descX[DLEN_], incX, IY, JY, descY[DLEN_], incY;
   int i, j, k, h, arow, acol, LOCp, LOCq, nb, info=0, one=1;
   double *absX, *Y, *absA, *x, *y, *a, *aa, beta=0.0;

   ctxt = descA[CTXT_];
   Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);
   pchkmat(M, 2, N, 3, IA+1, JA+1, descA, 8, &info, nprow, npcol, myrow, mycol);
   pchkvec(N, 3, IX0+1, JX0+1, descX0, incX0, 12, &info, nprow, npcol,
           myrow, mycol);
   pchkvec(M, 2, IY0+1, JY0+1, descY0, incY0, 18, &info, nprow, npcol,
           myrow, mycol);
   if (descA[CTXT_] != descX0[CTXT_])
   {
      if (info == 0) info = -(1200+CTXT_+1);
   }
   else if (descX0[CTXT_] != descY0[CTXT_])
   {
      if (info == 0) info = -(1800+CTXT_+1);
   }
   if (info)
   {
      pberror_(&ctxt, "PDAGEMV", &info);
      return;
   }
/*
 * Quick return, if possible
 */
   if ( (M == 0) || (N == 0) || ((alpha == 0.0) && (beta0 == 1.0)) ) return;
/*
 * Scale Y0 by beta: Y0 = beta * Y0; this allows us to later add in
 * alpha*A*x to get Y0 = alpha*A*x + Y0*beta
 */
   Cpdscal1(N, beta0, Y0, IY0, JY0, descY0, incY0);

/*
 * Get local information about our matrix
 */
   Cinfog2l(IA, JA, descA, nprow, npcol, myrow, mycol, &i, &j, &arow, &acol);
   a = &A[ i+j*descA[LLD_] ];
   nb = descA[NB_];
   LOCp = Cnumroc2(M, IA, descA[MB_], myrow, descA[RSRC_], nprow);
   LOCq = Cnumroc2(N, JA, nb, mycol, descA[CSRC_], npcol);

/*
 * Allocate space for absX, Y, and absA
 */
   i = IA % descA[MB_] + LOCp;
   j = JA % nb + LOCq;

   Mmalloc(absX, double, i+j+nb*LOCp, h, ctxt);
   Y = &absX[i];
   absA = &Y[j];
/*
 * Set up absX, a column vector aligned with A, copy abs( X ) to it,
 * and give all process columns a copy
 */
   IX = IA % descA[MB_];
   JX = 0;
   Mdescset(descX, M + IX, 1, descA[MB_], 1, arow,
            MCindxg2p(JX0, descX0[NB_], descX0[CSRC_], npcol), ctxt, LOCp+IX+1);
   incX = 1;
   Cpdcopy1(M, X0, IX0, JX0, descX0, incX0, absX, IX, JX, descX, incX);
/*
 * Set local pointer into absX
 */
   if (myrow == descX[RSRC_]) x = &absX[IX];
   else x = absX;

   top = ptop("B", "R", "!");
   if (mycol == descX[CSRC_])
   {
      for (i=0; i != LOCp; i++) x[i] = ABS( x[i] );
      Cdgebs2d(ctxt, "r", top, LOCp, 1, x, LOCp);
   }
   else Cdgebr2d(ctxt, "r", top, LOCp, 1, x, LOCp, myrow, descX[CSRC_]);

/*
 * Set up Y, a row vector aligned with A
 */
   IY = 0;
   JY = JA % nb;
   Mdescset(descY, 1, N+JY, 1, nb,
            MCindxg2p(IY0, descY0[MB_], descY0[RSRC_], nprow), acol, ctxt, 1);
   incY = 1;
/*
 * Set local pointer into Y
 */
   if (mycol == descY[CSRC_]) y = &Y[JY];
   else y = Y;
/*
 * Figure local portion of abs matrix vector product by looping over NB wide panels
 */
   if (LOCq != 0)
   {
      if (LOCp != 0)
      {
         j = 0;
         do
         {
            aa = absA;
            h = MIN(nb, LOCq-j);
            for (k=0; k != h; k++)  /* Set absA = abs( sub(A) ) */
            {
               for(i=0; i != LOCp; i++) aa[i] = ABS( a[i] );
               a += descA[LLD_];
               aa += LOCp;
            }
            dgemv_(trans, &LOCp, &h, &alpha, absA, &LOCp, x, &one, &beta,
                   &y[j], &one);
            j += nb;
         }
         while (j < LOCq);
      }
      else for (i=0; i != LOCq; i++) y[i] = 0.0;
/*
 *    Figure global answer
 */
      top = ptop("C", "C", "!");
      Cdgsum2d(ctxt, "col", top, LOCq, 1, y, LOCq, descY[RSRC_], mycol);
   }
/*
 * Y contains ALPHA*A*x. Y0 contains BETA*y.  Use pdaxpy to set
 * Y0 = ALPHA*A*x + BETA*y
 */
   Cpdaxpy1(N, 1.0, Y, IY, JY, descY, incY, Y0, IY0, JY0, descY0, incY0);

   if (absX) free(absX);
}