Actual source code: dgmres.c

petsc-3.4.2 2013-07-02
  2: /*
  3:  This file implements the deflated GMRES.
  4:  References:
  5:  [1]J. Erhel, K. Burrage and B. Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
  6:  [2] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

  8:  */

 10:  #include ../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h

 12: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;

 14: #define GMRES_DELTA_DIRECTIONS 10
 15: #define GMRES_DEFAULT_MAXK     30
 16: static PetscErrorCode    KSPDGMRESGetNewVectors(KSP,PetscInt);
 17: static PetscErrorCode    KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
 18: static PetscErrorCode    KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);

 22: PetscErrorCode  KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
 23: {

 27:   PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
 28:   return(0);
 29: }
 32: PetscErrorCode  KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
 33: {

 37:   PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
 38:   return(0);
 39: }
 42: PetscErrorCode  KSPDGMRESForce(KSP ksp,PetscInt force)
 43: {

 47:   PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscInt),(ksp,force));
 48:   return(0);
 49: }
 52: PetscErrorCode  KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
 53: {

 57:   PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
 58:   return(0);
 59: }
 62: PetscErrorCode  KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
 63: {

 67:   PetscTryMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
 68:   return(0);
 69: }
 72: PetscErrorCode  KSPDGMRESComputeDeflationData(KSP ksp)
 73: {

 77:   PetscTryMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP),(ksp));
 78:   return(0);
 79: }
 82: PetscErrorCode  KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
 83: {

 87:   PetscTryMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
 88:   return(0);
 89: }

 93: PetscErrorCode  KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
 94: {

 98:   PetscTryMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
 99:   return(0);
100: }

104: PetscErrorCode  KSPSetUp_DGMRES(KSP ksp)
105: {
107:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
108:   PetscInt       neig    = dgmres->neig+EIG_OFFSET;
109:   PetscInt       max_k   = dgmres->max_k+1;

112:   KSPSetUp_GMRES(ksp);
113:   if (!dgmres->neig) return(0);

115:   /* Allocate workspace for the Schur vectors*/
116:   PetscMalloc((neig) *max_k*sizeof(PetscReal), &SR);
117:   dgmres->wr    = NULL;
118:   dgmres->wi    = NULL;
119:   dgmres->perm  = NULL;
120:   dgmres->modul = NULL;
121:   dgmres->Q     = NULL;
122:   dgmres->Z     = NULL;

124:   UU   = NULL;
125:   XX   = NULL;
126:   MX   = NULL;
127:   AUU  = NULL;
128:   XMX  = NULL;
129:   XMU  = NULL;
130:   UMX  = NULL;
131:   AUAU = NULL;
132:   TT   = NULL;
133:   TTF  = NULL;
134:   INVP = NULL;
135:   X1   = NULL;
136:   X2   = NULL;
137:   MU   = NULL;
138:   return(0);
139: }

141: /*
142:  Run GMRES, possibly with restart.  Return residual history if requested.
143:  input parameters:

145:  .       gmres  - structure containing parameters and work areas

147:  output parameters:
148:  .        nres    - residuals (from preconditioned system) at each step.
149:  If restarting, consider passing nres+it.  If null,
150:  ignored
151:  .        itcount - number of iterations used.  nres[0] to nres[itcount]
152:  are defined.  If null, ignored.

154:  Notes:
155:  On entry, the value in vector VEC_VV(0) should be the initial residual
156:  (this allows shortcuts where the initial preconditioned residual is 0).
157:  */
160: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
161: {
162:   KSP_DGMRES     *dgmres = (KSP_DGMRES*)(ksp->data);
163:   PetscReal      res_norm,res,hapbnd,tt;
165:   PetscInt       it     = 0;
166:   PetscInt       max_k  = dgmres->max_k;
167:   PetscBool      hapend = PETSC_FALSE;
168:   PetscReal      res_old;
169:   PetscInt       test = 0;

172:   VecNormalize(VEC_VV(0),&res_norm);
173:   res     = res_norm;
174:   *GRS(0) = res_norm;

176:   /* check for the convergence */
177:   PetscObjectAMSTakeAccess((PetscObject)ksp);
178:   ksp->rnorm = res;
179:   PetscObjectAMSGrantAccess((PetscObject)ksp);
180:   dgmres->it = (it - 1);
181:   KSPLogResidualHistory(ksp,res);
182:   KSPMonitor(ksp,ksp->its,res);
183:   if (!res) {
184:     if (itcount) *itcount = 0;
185:     ksp->reason = KSP_CONVERGED_ATOL;
186:     PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
187:     return(0);
188:   }
189:   /* record the residual norm to test if deflation is needed */
190:   res_old = res;

192:   (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
193:   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
194:     if (it) {
195:       KSPLogResidualHistory(ksp,res);
196:       KSPMonitor(ksp,ksp->its,res);
197:     }
198:     dgmres->it = (it - 1);
199:     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
200:       KSPDGMRESGetNewVectors(ksp,it+1);
201:     }
202:     if (dgmres->r > 0) {
203:       if (ksp->pc_side == PC_LEFT) {
204:         /* Apply the first preconditioner */
205:         KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
206:         /* Then apply Deflation as a preconditioner */
207:         KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
208:       } else if (ksp->pc_side == PC_RIGHT) {
209:         KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
210:         KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
211:       }
212:     } else {
213:       KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
214:     }
215:     dgmres->matvecs += 1;
216:     /* update hessenberg matrix and do Gram-Schmidt */
217:     (*dgmres->orthog)(ksp,it);

219:     /* vv(i+1) . vv(i+1) */
220:     VecNormalize(VEC_VV(it+1),&tt);
221:     /* save the magnitude */
222:     *HH(it+1,it)  = tt;
223:     *HES(it+1,it) = tt;

225:     /* check for the happy breakdown */
226:     hapbnd = PetscAbsScalar(tt / *GRS(it));
227:     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
228:     if (tt < hapbnd) {
229:       PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
230:       hapend = PETSC_TRUE;
231:     }
232:     KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);

234:     it++;
235:     dgmres->it = (it-1);     /* For converged */
236:     ksp->its++;
237:     ksp->rnorm = res;
238:     if (ksp->reason) break;

240:     (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);

242:     /* Catch error in happy breakdown and signal convergence and break from loop */
243:     if (hapend) {
244:       if (!ksp->reason) {
245:         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
246:         else {
247:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
248:           break;
249:         }
250:       }
251:     }
252:   }

254:   /* Monitor if we know that we will not return for a restart */
255:   if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
256:     KSPLogResidualHistory(ksp,res);
257:     KSPMonitor(ksp,ksp->its,res);
258:   }
259:   if (itcount) *itcount = it;

261:   /*
262:    Down here we have to solve for the "best" coefficients of the Krylov
263:    columns, add the solution values together, and possibly unwind the
264:    preconditioning from the solution
265:    */
266:   /* Form the solution (or the solution so far) */
267:   KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);

269:   /* Compute data for the deflation to be used during the next restart */
270:   if (!ksp->reason && ksp->its < ksp->max_it) {
271:     test = max_k *log(ksp->rtol/res) /log(res/res_old);
272:     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
273:     if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
274:        KSPDGMRESComputeDeflationData(ksp);
275:     }
276:   }
277:   return(0);
278: }

282: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
283: {
285:   PetscInt       its,itcount;
286:   KSP_DGMRES     *dgmres    = (KSP_DGMRES*) ksp->data;
287:   PetscBool      guess_zero = ksp->guess_zero;

290:   if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");

292:   PetscObjectAMSTakeAccess((PetscObject)ksp);
293:   ksp->its        = 0;
294:   dgmres->matvecs = 0;
295:   PetscObjectAMSGrantAccess((PetscObject)ksp);

297:   itcount     = 0;
298:   ksp->reason = KSP_CONVERGED_ITERATING;
299:   while (!ksp->reason) {
300:     KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
301:     if (ksp->pc_side == PC_LEFT) {
302:       dgmres->matvecs += 1;
303:       if (dgmres->r > 0) {
304:         KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
305:         VecCopy(VEC_TEMP, VEC_VV(0));
306:       }
307:     }

309:     KSPDGMRESCycle(&its,ksp);
310:     itcount += its;
311:     if (itcount >= ksp->max_it) {
312:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
313:       break;
314:     }
315:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
316:   }
317:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
318:   return(0);
319: }


324: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
325: {
327:   KSP_DGMRES     *dgmres  = (KSP_DGMRES*) ksp->data;
328:   PetscInt       neig1    = dgmres->neig+EIG_OFFSET;
329:   PetscInt       max_neig = dgmres->max_neig;

332:   if (dgmres->r) {
333:     VecDestroyVecs(max_neig, &UU);
334:     VecDestroyVecs(max_neig, &MU);
335:     if (XX) {
336:       VecDestroyVecs(neig1, &XX);
337:       VecDestroyVecs(neig1, &MX);
338:     }

340:     PetscFree(TT);
341:     PetscFree(TTF);
342:     PetscFree(INVP);

344:     PetscFree(XMX);
345:     PetscFree(UMX);
346:     PetscFree(XMU);
347:     PetscFree(X1);
348:     PetscFree(X2);
349:     PetscFree(dgmres->work);
350:     PetscFree(dgmres->iwork);
351:     PetscFree(dgmres->wr);
352:     PetscFree(dgmres->wi);
353:     PetscFree(dgmres->modul);
354:     PetscFree(dgmres->Q);
355:     PetscFree(ORTH);
356:     PetscFree(AUAU);
357:     PetscFree(AUU);
358:     PetscFree(SR2);
359:   }
360:   PetscFree(SR);
361:   KSPDestroy_GMRES(ksp);
362:   return(0);
363: }
364: /*
365:  KSPDGMRESBuildSoln - create the solution from the starting vector and the
366:  current iterates.

368:  Input parameters:
369:  nrs - work area of size it + 1.
370:  vs  - index of initial guess
371:  vdest - index of result.  Note that vs may == vdest (replace
372:  guess with the solution).

374:  This is an internal routine that knows about the GMRES internals.
375:  */
378: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
379: {
380:   PetscScalar    tt;
382:   PetscInt       ii,k,j;
383:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) (ksp->data);

385:   /* Solve for solution vector that minimizes the residual */

388:   /* If it is < 0, no gmres steps have been performed */
389:   if (it < 0) {
390:     VecCopy(vs,vdest);     /* VecCopy() is smart, exists immediately if vguess == vdest */
391:     return(0);
392:   }
393:   if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));
394:   if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
395:   else nrs[it] = 0.0;

397:   for (ii=1; ii<=it; ii++) {
398:     k  = it - ii;
399:     tt = *GRS(k);
400:     for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
401:     if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
402:     nrs[k] = tt / *HH(k,k);
403:   }

405:   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
406:   VecSet(VEC_TEMP,0.0);
407:   VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));

409:   /* Apply deflation */
410:   if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
411:     KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
412:     VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
413:   }
414:   KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);

416:   /* add solution to previous solution */
417:   if (vdest != vs) {
418:     VecCopy(vs,vdest);
419:   }
420:   VecAXPY(vdest,1.0,VEC_TEMP);
421:   return(0);
422: }
423: /*
424:  Do the scalar work for the orthogonalization.  Return new residual norm.
425:  */
428: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
429: {
430:   PetscScalar *hh,*cc,*ss,tt;
431:   PetscInt    j;
432:   KSP_DGMRES  *dgmres = (KSP_DGMRES*) (ksp->data);

435:   hh = HH(0,it);
436:   cc = CC(0);
437:   ss = SS(0);

439:   /* Apply all the previously computed plane rotations to the new column
440:    of the Hessenberg matrix */
441:   for (j=1; j<=it; j++) {
442:     tt  = *hh;
443:     *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
444:     hh++;
445:     *hh = *cc++ * *hh -(*ss++ * tt);
446:   }

448:   /*
449:    compute the new plane rotation, and apply it to:
450:    1) the right-hand-side of the Hessenberg system
451:    2) the new column of the Hessenberg matrix
452:    thus obtaining the updated value of the residual
453:    */
454:   if (!hapend) {
455:     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
456:     if (tt == 0.0) {
457:       ksp->reason = KSP_DIVERGED_NULL;
458:       return(0);
459:     }
460:     *cc        = *hh / tt;
461:     *ss        = *(hh+1) / tt;
462:     *GRS(it+1) = -(*ss * *GRS(it));
463:     *GRS(it)   = PetscConj(*cc) * *GRS(it);
464:     *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);
465:     *res       = PetscAbsScalar(*GRS(it+1));
466:   } else {
467:     /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
468:      another rotation matrix (so RH doesn't change).  The new residual is
469:      always the new sine term times the residual from last time (GRS(it)),
470:      but now the new sine rotation would be zero...so the residual should
471:      be zero...so we will multiply "zero" by the last residual.  This might
472:      not be exactly what we want to do here -could just return "zero". */

474:     *res = 0.0;
475:   }
476:   return(0);
477: }
478: /*
479:  This routine allocates more work vectors, starting from VEC_VV(it).
480:  */
483: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
484: {
485:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
487:   PetscInt       nwork = dgmres->nwork_alloc,k,nalloc;

490:   nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
491:   /* Adjust the number to allocate to make sure that we don't exceed the
492:    number of available slots */
493:   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
494:     nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
495:   }
496:   if (!nalloc) return(0);

498:   dgmres->vv_allocated += nalloc;

500:   KSPGetVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
501:   PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);

503:   dgmres->mwork_alloc[nwork] = nalloc;
504:   for (k=0; k<nalloc; k++) {
505:     dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
506:   }
507:   dgmres->nwork_alloc++;
508:   return(0);
509: }

513: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
514: {
515:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;

519:   if (!ptr) {
520:     if (!dgmres->sol_temp) {
521:       VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
522:       PetscLogObjectParent(ksp,dgmres->sol_temp);
523:     }
524:     ptr = dgmres->sol_temp;
525:   }
526:   if (!dgmres->nrs) {
527:     /* allocate the work area */
528:     PetscMalloc(dgmres->max_k*sizeof(PetscScalar),&dgmres->nrs);
529:     PetscLogObjectMemory(ksp,dgmres->max_k*sizeof(PetscScalar));
530:   }

532:   KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
533:   if (result) *result = ptr;
534:   return(0);
535: }

539: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
540: {
541:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
543:   PetscBool      iascii,isharmonic;

546:   KSPView_GMRES(ksp,viewer);
547:   PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
548:   if (iascii) {
549:     if (dgmres->force) PetscViewerASCIIPrintf(viewer, "   DGMRES: Adaptive strategy is used: FALSE\n");
550:     else PetscViewerASCIIPrintf(viewer, "   DGMRES: Adaptive strategy is used: TRUE\n");
551:     PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &isharmonic);
552:     if (isharmonic) {
553:       PetscViewerASCIIPrintf(viewer, "  DGMRES: Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
554:     } else {
555:       PetscViewerASCIIPrintf(viewer, "  DGMRES: Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
556:     }
557:     PetscViewerASCIIPrintf(viewer, "  DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
558:     PetscViewerASCIIPrintf(viewer, "  DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
559:     PetscViewerASCIIPrintf(viewer, "  DGMRES: relaxation parameter for the adaptive strategy(smv)  = %g\n", dgmres->smv);
560:     PetscViewerASCIIPrintf(viewer, "  DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
561:   }
562:   return(0);
563: }

565: /* New DGMRES functions */

569: static PetscErrorCode  KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
570: {
571:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

574:   if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
575:   dgmres->neig=neig;
576:   return(0);
577: }

581: static PetscErrorCode  KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
582: {
583:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

586:   if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
587:   dgmres->max_neig=max_neig;
588:   return(0);
589: }

593: static PetscErrorCode  KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
594: {
595:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

598:   if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
599:   dgmres->smv=ratio;
600:   return(0);
601: }

605: static PetscErrorCode  KSPDGMRESForce_DGMRES(KSP ksp,PetscInt force)
606: {
607:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

610:   if (force != 0 && force != 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
611:   dgmres->force = 1;
612:   return(0);
613: }

615: extern PetscErrorCode KSPSetFromOptions_GMRES(KSP);

619: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp)
620: {
622:   PetscInt       neig;
623:   PetscInt       max_neig;
624:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
625:   PetscBool      flg;
626:   PetscReal      smv;
627:   PetscInt       input;

630:   KSPSetFromOptions_GMRES(ksp);
631:   PetscOptionsHead("KSP DGMRES Options");
632:   PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
633:   if (flg) {
634:     KSPDGMRESSetEigen(ksp, neig);
635:   }
636:   PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
637:   if (flg) {
638:     KSPDGMRESSetMaxEigen(ksp, max_neig);
639:   }
640:   PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
641:   if (flg) dgmres->smv = smv;
642:   PetscOptionsInt("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
643:   if (flg) dgmres->improve = input;
644:   PetscOptionsInt("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
645:   if (flg) dgmres->force = input;
646:   PetscOptionsTail();
647:   return(0);
648: }

652: static PetscErrorCode  KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
653: {
654:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
656:   PetscInt       i,j, k;
657:   PetscBLASInt   nr, bmax;
658:   PetscInt       r = dgmres->r;
659:   PetscInt       neig;          /* number of eigenvalues to extract at each restart */
660:   PetscInt       neig1    = dgmres->neig + EIG_OFFSET;  /* max number of eig that can be extracted at each restart */
661:   PetscInt       max_neig = dgmres->max_neig;  /* Max number of eigenvalues to extract during the iterative process */
662:   PetscInt       N        = dgmres->max_k+1;
663:   PetscInt       n        = dgmres->it+1;
664:   PetscReal      alpha;
665: #if !defined(PETSC_MISSING_LAPACK_GETRF)
666:   PetscBLASInt info;
667: #endif

670:   PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
671:   if (dgmres->neig == 0) return(0);
672:   if (max_neig < (r+neig1) && !dgmres->improve) {
673:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
674:     return(0);
675:   }

677:    KSPDGMRESComputeSchurForm(ksp, &neig);
678:   /* Form the extended Schur vectors X=VV*Sr */
679:   if (!XX) {
680:     VecDuplicateVecs(VEC_VV(0), neig1, &XX);
681:   }
682:   for (j = 0; j<neig; j++) {
683:     VecZeroEntries(XX[j]);
684:     VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
685:   }

687:   /* Orthogonalize X against U */
688:   if (!ORTH) {
689:     PetscMalloc(max_neig*sizeof(PetscReal), &ORTH);
690:   }
691:   if (r > 0) {
692:     /* modified Gram-Schmidt */
693:     for (j = 0; j<neig; j++) {
694:       for (i=0; i<r; i++) {
695:         /* First, compute U'*X[j] */
696:         VecDot(XX[j], UU[i], &alpha);
697:         /* Then, compute X(j)=X(j)-U*U'*X(j) */
698:         VecAXPY(XX[j], -alpha, UU[i]);
699:       }
700:     }
701:   }
702:   /* Compute MX = M^{-1}*A*X */
703:   if (!MX) {
704:     VecDuplicateVecs(VEC_VV(0), neig1, &MX);
705:   }
706:   for (j = 0; j<neig; j++) {
707:     KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
708:   }
709:   dgmres->matvecs += neig;

711:   if ((r+neig1) > max_neig && dgmres->improve) {    /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
712:     KSPDGMRESImproveEig(ksp, neig);
713:     return(0);   /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
714:   }

716:   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
717:   if (!XMX) {
718:     PetscMalloc(neig1*neig1*sizeof(PetscReal), &XMX);
719:   }
720:   for (j = 0; j < neig; j++) {
721:     VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
722:   }

724:   if (r > 0) {
725:     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
726:     if (!UMX) {
727:       PetscMalloc(max_neig*neig1*sizeof(PetscReal), &UMX);
728:     }
729:     for (j = 0; j < neig; j++) {
730:       VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
731:     }
732:     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
733:     if (!XMU) {
734:       PetscMalloc(max_neig*neig1*sizeof(PetscReal), &XMU);
735:     }
736:     for (j = 0; j<r; j++) {
737:       VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
738:     }
739:   }

741:   /* Form the new matrix T = [T UMX; XMU XMX]; */
742:   if (!TT) {
743:     PetscMalloc(max_neig*max_neig*sizeof(PetscReal), &TT);
744:   }
745:   if (r > 0) {
746:     /* Add XMU to T */
747:     for (j = 0; j < r; j++) {
748:       PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
749:     }
750:     /* Add [UMX; XMX] to T */
751:     for (j = 0; j < neig; j++) {
752:       k = r+j;
753:       PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
754:       PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
755:     }
756:   } else { /* Add XMX to T */
757:     for (j = 0; j < neig; j++) {
758:       PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
759:     }
760:   }

762:   dgmres->r += neig;
763:   r          = dgmres->r;
764:   PetscBLASIntCast(r,&nr);
765:   /*LU Factorize T with Lapack xgetrf routine */

767:   PetscBLASIntCast(max_neig,&bmax);
768:   if (!TTF) {
769:     PetscMalloc(bmax*bmax*sizeof(PetscReal), &TTF);
770:   }
771:   PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
772:   if (!INVP) {
773:     PetscMalloc(bmax*sizeof(PetscBLASInt), &INVP);
774:   }
775: #if defined(PETSC_MISSING_LAPACK_GETRF)
776:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
777: #else
778:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
779:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
780: #endif

782:   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
783:   if (!UU) {
784:     VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
785:     VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
786:   }
787:   for (j=0; j<neig; j++) {
788:     VecCopy(XX[j], UU[r-neig+j]);
789:     VecCopy(MX[j], MU[r-neig+j]);
790:   }
791:   PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
792:   return(0);
793: }

797: static PetscErrorCode  KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
798: {
799:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
801:   PetscInt       N = dgmres->max_k + 1, n=dgmres->it+1;
802:   PetscBLASInt   bn, bN;
803:   PetscReal      *A;
804:   PetscBLASInt   ihi;
805:   PetscBLASInt   ldA;          /* leading dimension of A */
806:   PetscBLASInt   ldQ;          /* leading dimension of Q */
807:   PetscReal      *Q;           /*  orthogonal matrix of  (left) schur vectors */
808:   PetscReal      *work;        /* working vector */
809:   PetscBLASInt   lwork;        /* size of the working vector */
810:   PetscInt       *perm;        /* Permutation vector to sort eigenvalues */
811:   PetscInt       i, j;
812:   PetscBLASInt   NbrEig;       /* Number of eigenvalues really extracted */
813:   PetscReal      *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
814:   PetscBLASInt   *select;
815:   PetscBLASInt   *iwork;
816:   PetscBLASInt   liwork;
817:   PetscScalar    *Ht;           /* Transpose of the Hessenberg matrix */
818:   PetscScalar    *t;            /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
819:   PetscBLASInt   nrhs = 1;      /* Number of right hand side */
820:   PetscBLASInt   *ipiv;         /* Permutation vector to be used in LAPACK */
821: #if !defined(PETSC_MISSING_LAPACK_HSEQR) || !defined(PETSC_MISSING_LAPACK_TRSEN)
822:   PetscBLASInt ilo = 1;
823:   PetscBLASInt info;
824:   PetscReal    CondEig;         /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
825:   PetscReal    CondSub;         /* estimated reciprocal condition number of the specified invariant subspace. */
826:   PetscBool    flag;            /* determine whether to use Ritz vectors or harmonic Ritz vectors */
827: #endif

830:   PetscBLASIntCast(n,&bn);
831:   PetscBLASIntCast(N,&bN);
832:   ihi  = ldQ = bn;
833:   ldA  = bN;
834:   PetscBLASIntCast(5*N,&lwork);

836: #if defined(PETSC_USE_COMPLEX)
837:   SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
838: #endif

840:   PetscMalloc(ldA*ldA*sizeof(PetscReal), &A);
841:   PetscMalloc(ldQ*n*sizeof(PetscReal), &Q);
842:   PetscMalloc(lwork*sizeof(PetscReal), &work);
843:   if (!dgmres->wr) {
844:     PetscMalloc(n*sizeof(PetscReal), &dgmres->wr);
845:     PetscMalloc(n*sizeof(PetscReal), &dgmres->wi);
846:   }
847:   wr   = dgmres->wr;
848:   wi   = dgmres->wi;
849:   PetscMalloc(n*sizeof(PetscReal),&modul);
850:   PetscMalloc(n*sizeof(PetscInt),&perm);
851:   /* copy the Hessenberg matrix to work space */
852:   PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
853:   PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &flag);
854:   if (flag) {
855:     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
856:     /* Transpose the Hessenberg matrix */
857:     PetscMalloc(bn*bn*sizeof(PetscScalar), &Ht);
858:     for (i = 0; i < bn; i++) {
859:       for (j = 0; j < bn; j++) {
860:         Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
861:       }
862:     }

864:     /* Solve the system H^T*t = h_{m+1,m}e_m */
865:     PetscMalloc(bn*sizeof(PetscScalar), &t);
866:     PetscMemzero(t, bn*sizeof(PetscScalar));
867:     t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
868:     PetscMalloc(bn*sizeof(PetscBLASInt), &ipiv);
869:     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
870: #if   defined(PETSC_MISSING_LAPACK_GESV)
871:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
872: #else
873:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
874:     if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
875: #endif
876:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
877:     for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
878:     PetscFree(t);
879:     PetscFree(Ht);
880:   }
881:   /* Compute eigenvalues with the Schur form */
882: #if defined(PETSC_MISSING_LAPACK_HSEQR)
883:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
884: #else
885:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
886:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
887: #endif
888:   PetscFree(work);

890:   /* sort the eigenvalues */
891:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
892:   for (i=0; i<n; i++) perm[i] = i;

894:   PetscSortRealWithPermutation(n, modul, perm);
895:   /* save the complex modulus of the largest eigenvalue in magnitude */
896:   if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
897:   /* count the number of extracted eigenvalues (with complex conjugates) */
898:   NbrEig = 0;
899:   while (NbrEig < dgmres->neig) {
900:     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
901:     else NbrEig += 1;
902:   }
903:   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

905:   PetscMalloc(n * sizeof(PetscBLASInt), &select);
906:   PetscMemzero(select, n * sizeof(PetscBLASInt));

908:   if (!dgmres->GreatestEig) {
909:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
910:   } else {
911:     for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
912:   }
913:   /* call Lapack dtrsen */
914:   lwork  =  PetscMax(1, 4 * NbrEig *(bn-NbrEig));
915:   liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
916:   PetscMalloc(lwork * sizeof(PetscScalar), &work);
917:   PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
918: #if defined(PETSC_MISSING_LAPACK_TRSEN)
919:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
920: #else
921:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
922:   if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
923: #endif
924:   PetscFree(select);

926:   /* Extract the Schur vectors */
927:   for (j = 0; j < NbrEig; j++) {
928:     PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
929:   }
930:   *neig = NbrEig;
931:   PetscFree(A);
932:   PetscFree(work);
933:   PetscFree(perm);
934:   PetscFree(work);
935:   PetscFree(iwork);
936:   PetscFree(modul);
937:   PetscFree(Q);
938:   return(0);
939: }

943: static PetscErrorCode  KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
944: {
945:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
946:   PetscInt       i, r     = dgmres->r;
948:   PetscBLASInt   nrhs     = 1;
949:   PetscReal      alpha    = 1.0;
950:   PetscInt       max_neig = dgmres->max_neig;
951:   PetscBLASInt   br,bmax;
952:   PetscInt       lambda = dgmres->lambdaN;
953: #if !defined(PETSC_MISSING_LAPACK_GERFS)
954:   PetscReal    berr, ferr;
955:   PetscBLASInt info;
956: #endif

959:   PetscBLASIntCast(r,&br);
960:   PetscBLASIntCast(max_neig,&bmax);
961:   PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
962:   if (!r) {
963:     VecCopy(x,y);
964:     return(0);
965:   }
966:   /* Compute U'*x */
967:   if (!X1) {
968:     PetscMalloc(bmax*sizeof(PetscReal), &X1);
969:     PetscMalloc(bmax*sizeof(PetscReal), &X2);
970:   }
971:   VecMDot(x, r, UU, X1);

973:   /* Solve T*X1=X2 for X1*/
974:   PetscMemcpy(X2, X1, br*sizeof(PetscReal));
975: #if defined(PETSC_MISSING_LAPACK_GETRS)
976:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
977: #else
978:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
979:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
980: #endif
981:   /* Iterative refinement -- is it really necessary ?? */
982:   if (!WORK) {
983:     PetscMalloc(3*bmax*sizeof(PetscReal), &WORK);
984:     PetscMalloc(bmax*sizeof(PetscBLASInt), &IWORK);
985:   }
986: #if defined(PETSC_MISSING_LAPACK_GERFS)
987:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
988: #else
989:   PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
990:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
991: #endif

993:   for (i = 0; i < r; i++) X2[i] =  X1[i]/lambda - X2[i];

995:   /* Compute X2=U*X2 */
996:   VecZeroEntries(y);
997:   VecMAXPY(y, r, X2, UU);
998:   VecAXPY(y, alpha, x);

1000:   PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1001:   return(0);
1002: }

1006: static PetscErrorCode  KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
1007: {
1008:   KSP_DGMRES   *dgmres = (KSP_DGMRES*) ksp->data;
1009:   PetscInt     j,r_old, r = dgmres->r;
1010:   PetscBLASInt i     = 0;
1011:   PetscInt     neig1 = dgmres->neig + EIG_OFFSET;
1012:   PetscInt     bmax  = dgmres->max_neig;
1013:   PetscInt     aug   = r + neig;         /* actual size of the augmented invariant basis */
1014:   PetscInt     aug1  = bmax+neig1;       /* maximum size of the augmented invariant basis */
1015:   PetscBLASInt ldA;            /* leading dimension of AUAU and AUU*/
1016:   PetscBLASInt N;              /* size of AUAU */
1017:   PetscReal    *Q;             /*  orthogonal matrix of  (left) schur vectors */
1018:   PetscReal    *Z;             /*  orthogonal matrix of  (right) schur vectors */
1019:   PetscReal    *work;          /* working vector */
1020:   PetscBLASInt lwork;          /* size of the working vector */
1021:   PetscBLASInt info;           /* Output info from Lapack routines */
1022:   PetscInt     *perm;          /* Permutation vector to sort eigenvalues */
1023:   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1024:   PetscInt     ierr;
1025:   PetscBLASInt NbrEig = 0,nr,bm;
1026:   PetscBLASInt *select;
1027:   PetscBLASInt liwork, *iwork;
1028: #if !defined(PETSC_MISSING_LAPACK_TGSEN)
1029:   PetscReal    Dif[2];
1030:   PetscBLASInt ijob  = 2;
1031:   PetscBLASInt wantQ = 1, wantZ = 1;
1032: #endif

1035:   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1036:   if (!AUU) {
1037:     PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUU);
1038:     PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUAU);
1039:   }
1040:   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1041:    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1042:   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1043:   for (j=0; j < r; j++) {
1044:     VecMDot(UU[j], r, MU, &AUU[j*aug1]);
1045:   }
1046:   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1047:   for (j = 0; j < neig; j++) {
1048:     VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
1049:   }
1050:   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1051:   for (j = 0; j < r; j++) {
1052:     VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
1053:   }
1054:   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
1055:   for (j = 0; j < neig; j++) {
1056:     VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
1057:   }

1059:   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1060:   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1061:   for (j=0; j < r; j++) {
1062:     VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1063:   }
1064:   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1065:   for (j = 0; j < neig; j++) {
1066:     VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1067:   }
1068:   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1069:   for (j = 0; j < r; j++) {
1070:     VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1071:   }
1072:   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
1073:   for (j = 0; j < neig; j++) {
1074:     VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1075:   }

1077:   /* Computation of the eigenvectors */
1078:   PetscBLASIntCast(aug1,&ldA);
1079:   PetscBLASIntCast(aug,&N);
1080:   lwork = 8 * N + 20; /* sizeof the working space */
1081:   PetscMalloc(N*sizeof(PetscReal), &wr);
1082:   PetscMalloc(N*sizeof(PetscReal), &wi);
1083:   PetscMalloc(N*sizeof(PetscReal), &beta);
1084:   PetscMalloc(N*sizeof(PetscReal), &modul);
1085:   PetscMalloc(N*sizeof(PetscInt), &perm);
1086:   PetscMalloc(N*N*sizeof(PetscReal), &Q);
1087:   PetscMalloc(N*N*sizeof(PetscReal), &Z);
1088:   PetscMalloc(lwork*sizeof(PetscReal), &work);
1089: #if defined(PETSC_MISSING_LAPACK_GGES)
1090:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1091: #else
1092:   PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1093:   if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1094: #endif
1095:   for (i=0; i<N; i++) {
1096:     if (beta[i] !=0.0) {
1097:       wr[i] /=beta[i];
1098:       wi[i] /=beta[i];
1099:     }
1100:   }
1101:   /* sort the eigenvalues */
1102:   for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1103:   for (i=0; i<N; i++) perm[i] = i;
1104:   PetscSortRealWithPermutation(N, modul, perm);
1105:   /* Save the norm of the largest eigenvalue */
1106:   if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1107:   /* Allocate space to extract the first r schur vectors   */
1108:   if (!SR2) {
1109:     PetscMalloc(aug1*bmax*sizeof(PetscReal), &SR2);
1110:   }
1111:   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1112:   while (NbrEig < bmax) {
1113:     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1114:     else NbrEig += 2;
1115:   }
1116:   if (NbrEig > bmax) NbrEig = bmax - 1;
1117:   r_old     = r; /* previous size of r */
1118:   dgmres->r = r = NbrEig;

1120:   /* Select the eigenvalues to reorder */
1121:   PetscMalloc(N * sizeof(PetscBLASInt), &select);
1122:   PetscMemzero(select, N * sizeof(PetscBLASInt));
1123:   if (!dgmres->GreatestEig) {
1124:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1125:   } else {
1126:     for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1127:   }
1128:   /* Reorder and extract the new <r> schur vectors */
1129:   lwork  = PetscMax(4 * N + 16,  2 * NbrEig *(N - NbrEig));
1130:   liwork = PetscMax(N + 6,  2 * NbrEig *(N - NbrEig));
1131:   PetscFree(work);
1132:   PetscMalloc(lwork * sizeof(PetscReal), &work);
1133:   PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1134: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1135:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1136: #else
1137:   PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1138:   if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1139: #endif
1140:   PetscFree(select);

1142:   for (j=0; j<r; j++) {
1143:     PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1144:   }

1146:   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
1147:    -- save it temporarily in MU */
1148:   for (j = 0; j < r; j++) {
1149:     VecZeroEntries(MU[j]);
1150:     VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1151:     VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1152:   }
1153:   /* Form T = U'*MU*U */
1154:   for (j = 0; j < r; j++) {
1155:     VecCopy(MU[j], UU[j]);
1156:     KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1157:   }
1158:   dgmres->matvecs += r;
1159:   for (j = 0; j < r; j++) {
1160:     VecMDot(MU[j], r, UU, &TT[j*bmax]);
1161:   }
1162:   /* Factorize T */
1163:   PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1164:   PetscBLASIntCast(r,&nr);
1165:   PetscBLASIntCast(bmax,&bm);
1166: #if defined(PETSC_MISSING_LAPACK_GETRF)
1167:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1168: #else
1169:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1170:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1171: #endif
1172:   /* Free Memory */
1173:   PetscFree(wr);
1174:   PetscFree(wi);
1175:   PetscFree(beta);
1176:   PetscFree(modul);
1177:   PetscFree(perm);
1178:   PetscFree(Q);
1179:   PetscFree(Z);
1180:   PetscFree(work);
1181:   PetscFree(iwork);
1182:   return(0);
1183: }

1185: /* end new DGMRES functions */

1187: /*MC
1188:  KSPDGMRES - Implements the deflated GMRES as defined in [1,2]; In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the stagnation occurs.
1189:  GMRES Options Database Keys:
1190:  +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1191:  .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1192:  .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1193:  vectors are allocated as needed)
1194:  .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1195:  .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1196:  .   -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1197:  stability of the classical Gram-Schmidt  orthogonalization.
1198:  -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
1199:  DGMRES Options Database Keys:
1200:  -ksp_dgmres_eigen <neig> - Number of smallest eigenvalues to extract at each restart
1201:  -ksp_dgmres_max_eigen <max_neig> - Maximum number of eigenvalues that can be extracted during the iterative process
1202:  -ksp_dgmres_force <0, 1> - Use the deflation at each restart; switch off the adaptive strategy.

1204:  Level: beginner

1206:  Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported

1208:  References:

1210:  [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1211:  [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.
1212:  [3] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

1214:  Contributed by: Desire NUENTSA WAKAM,INRIA

1216:  .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1217:  KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1218:  KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1219:  KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()

1221:  M*/

1225: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1226: {
1227:   KSP_DGMRES     *dgmres;

1231:   PetscNewLog(ksp,KSP_DGMRES,&dgmres);
1232:   ksp->data = (void*) dgmres;

1234:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1235:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);

1237:   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1238:   ksp->ops->setup                        = KSPSetUp_DGMRES;
1239:   ksp->ops->solve                        = KSPSolve_DGMRES;
1240:   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1241:   ksp->ops->view                         = KSPView_DGMRES;
1242:   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1243:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1244:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1246:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1247:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1248:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1249:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1250:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1251:   /* -- New functions defined in DGMRES -- */
1252:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1253:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1254:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1255:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1256:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1257:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1258:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1259:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);

1261:   PetscLogEventRegister("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1262:   PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1264:   dgmres->haptol         = 1.0e-30;
1265:   dgmres->q_preallocate  = 0;
1266:   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1267:   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1268:   dgmres->nrs            = 0;
1269:   dgmres->sol_temp       = 0;
1270:   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1271:   dgmres->Rsvd           = 0;
1272:   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1273:   dgmres->orthogwork     = 0;

1275:   /* Default values for the deflation */
1276:   dgmres->r           = 0;
1277:   dgmres->neig        = DGMRES_DEFAULT_EIG;
1278:   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG-1;
1279:   dgmres->lambdaN     = 0.0;
1280:   dgmres->smv         = SMV;
1281:   dgmres->force       = 0;
1282:   dgmres->matvecs     = 0;
1283:   dgmres->improve     = 0;
1284:   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1285:   dgmres->HasSchur    = PETSC_FALSE;
1286:   return(0);
1287: }