Actual source code: dgmres.c
petsc-3.4.2 2013-07-02
2: /*
3: This file implements the deflated GMRES.
4: References:
5: [1]J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
6: [2] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
8: */
10: #include ../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h
12: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
14: #define GMRES_DELTA_DIRECTIONS 10
15: #define GMRES_DEFAULT_MAXK 30
16: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
17: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
18: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
22: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
23: {
27: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
28: return(0);
29: }
32: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
33: {
37: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
38: return(0);
39: }
42: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscInt force)
43: {
47: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscInt),(ksp,force));
48: return(0);
49: }
52: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
53: {
57: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
58: return(0);
59: }
62: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
63: {
67: PetscTryMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
68: return(0);
69: }
72: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp)
73: {
77: PetscTryMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP),(ksp));
78: return(0);
79: }
82: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
83: {
87: PetscTryMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
88: return(0);
89: }
93: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
94: {
98: PetscTryMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
99: return(0);
100: }
104: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
105: {
107: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
108: PetscInt neig = dgmres->neig+EIG_OFFSET;
109: PetscInt max_k = dgmres->max_k+1;
112: KSPSetUp_GMRES(ksp);
113: if (!dgmres->neig) return(0);
115: /* Allocate workspace for the Schur vectors*/
116: PetscMalloc((neig) *max_k*sizeof(PetscReal), &SR);
117: dgmres->wr = NULL;
118: dgmres->wi = NULL;
119: dgmres->perm = NULL;
120: dgmres->modul = NULL;
121: dgmres->Q = NULL;
122: dgmres->Z = NULL;
124: UU = NULL;
125: XX = NULL;
126: MX = NULL;
127: AUU = NULL;
128: XMX = NULL;
129: XMU = NULL;
130: UMX = NULL;
131: AUAU = NULL;
132: TT = NULL;
133: TTF = NULL;
134: INVP = NULL;
135: X1 = NULL;
136: X2 = NULL;
137: MU = NULL;
138: return(0);
139: }
141: /*
142: Run GMRES, possibly with restart. Return residual history if requested.
143: input parameters:
145: . gmres - structure containing parameters and work areas
147: output parameters:
148: . nres - residuals (from preconditioned system) at each step.
149: If restarting, consider passing nres+it. If null,
150: ignored
151: . itcount - number of iterations used. nres[0] to nres[itcount]
152: are defined. If null, ignored.
154: Notes:
155: On entry, the value in vector VEC_VV(0) should be the initial residual
156: (this allows shortcuts where the initial preconditioned residual is 0).
157: */
160: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
161: {
162: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
163: PetscReal res_norm,res,hapbnd,tt;
165: PetscInt it = 0;
166: PetscInt max_k = dgmres->max_k;
167: PetscBool hapend = PETSC_FALSE;
168: PetscReal res_old;
169: PetscInt test = 0;
172: VecNormalize(VEC_VV(0),&res_norm);
173: res = res_norm;
174: *GRS(0) = res_norm;
176: /* check for the convergence */
177: PetscObjectAMSTakeAccess((PetscObject)ksp);
178: ksp->rnorm = res;
179: PetscObjectAMSGrantAccess((PetscObject)ksp);
180: dgmres->it = (it - 1);
181: KSPLogResidualHistory(ksp,res);
182: KSPMonitor(ksp,ksp->its,res);
183: if (!res) {
184: if (itcount) *itcount = 0;
185: ksp->reason = KSP_CONVERGED_ATOL;
186: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
187: return(0);
188: }
189: /* record the residual norm to test if deflation is needed */
190: res_old = res;
192: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
193: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
194: if (it) {
195: KSPLogResidualHistory(ksp,res);
196: KSPMonitor(ksp,ksp->its,res);
197: }
198: dgmres->it = (it - 1);
199: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
200: KSPDGMRESGetNewVectors(ksp,it+1);
201: }
202: if (dgmres->r > 0) {
203: if (ksp->pc_side == PC_LEFT) {
204: /* Apply the first preconditioner */
205: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
206: /* Then apply Deflation as a preconditioner */
207: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
208: } else if (ksp->pc_side == PC_RIGHT) {
209: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
210: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
211: }
212: } else {
213: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
214: }
215: dgmres->matvecs += 1;
216: /* update hessenberg matrix and do Gram-Schmidt */
217: (*dgmres->orthog)(ksp,it);
219: /* vv(i+1) . vv(i+1) */
220: VecNormalize(VEC_VV(it+1),&tt);
221: /* save the magnitude */
222: *HH(it+1,it) = tt;
223: *HES(it+1,it) = tt;
225: /* check for the happy breakdown */
226: hapbnd = PetscAbsScalar(tt / *GRS(it));
227: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
228: if (tt < hapbnd) {
229: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
230: hapend = PETSC_TRUE;
231: }
232: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
234: it++;
235: dgmres->it = (it-1); /* For converged */
236: ksp->its++;
237: ksp->rnorm = res;
238: if (ksp->reason) break;
240: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
242: /* Catch error in happy breakdown and signal convergence and break from loop */
243: if (hapend) {
244: if (!ksp->reason) {
245: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
246: else {
247: ksp->reason = KSP_DIVERGED_BREAKDOWN;
248: break;
249: }
250: }
251: }
252: }
254: /* Monitor if we know that we will not return for a restart */
255: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
256: KSPLogResidualHistory(ksp,res);
257: KSPMonitor(ksp,ksp->its,res);
258: }
259: if (itcount) *itcount = it;
261: /*
262: Down here we have to solve for the "best" coefficients of the Krylov
263: columns, add the solution values together, and possibly unwind the
264: preconditioning from the solution
265: */
266: /* Form the solution (or the solution so far) */
267: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
269: /* Compute data for the deflation to be used during the next restart */
270: if (!ksp->reason && ksp->its < ksp->max_it) {
271: test = max_k *log(ksp->rtol/res) /log(res/res_old);
272: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
273: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
274: KSPDGMRESComputeDeflationData(ksp);
275: }
276: }
277: return(0);
278: }
282: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
283: {
285: PetscInt its,itcount;
286: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
287: PetscBool guess_zero = ksp->guess_zero;
290: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
292: PetscObjectAMSTakeAccess((PetscObject)ksp);
293: ksp->its = 0;
294: dgmres->matvecs = 0;
295: PetscObjectAMSGrantAccess((PetscObject)ksp);
297: itcount = 0;
298: ksp->reason = KSP_CONVERGED_ITERATING;
299: while (!ksp->reason) {
300: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
301: if (ksp->pc_side == PC_LEFT) {
302: dgmres->matvecs += 1;
303: if (dgmres->r > 0) {
304: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
305: VecCopy(VEC_TEMP, VEC_VV(0));
306: }
307: }
309: KSPDGMRESCycle(&its,ksp);
310: itcount += its;
311: if (itcount >= ksp->max_it) {
312: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
313: break;
314: }
315: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
316: }
317: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
318: return(0);
319: }
324: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
325: {
327: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
328: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
329: PetscInt max_neig = dgmres->max_neig;
332: if (dgmres->r) {
333: VecDestroyVecs(max_neig, &UU);
334: VecDestroyVecs(max_neig, &MU);
335: if (XX) {
336: VecDestroyVecs(neig1, &XX);
337: VecDestroyVecs(neig1, &MX);
338: }
340: PetscFree(TT);
341: PetscFree(TTF);
342: PetscFree(INVP);
344: PetscFree(XMX);
345: PetscFree(UMX);
346: PetscFree(XMU);
347: PetscFree(X1);
348: PetscFree(X2);
349: PetscFree(dgmres->work);
350: PetscFree(dgmres->iwork);
351: PetscFree(dgmres->wr);
352: PetscFree(dgmres->wi);
353: PetscFree(dgmres->modul);
354: PetscFree(dgmres->Q);
355: PetscFree(ORTH);
356: PetscFree(AUAU);
357: PetscFree(AUU);
358: PetscFree(SR2);
359: }
360: PetscFree(SR);
361: KSPDestroy_GMRES(ksp);
362: return(0);
363: }
364: /*
365: KSPDGMRESBuildSoln - create the solution from the starting vector and the
366: current iterates.
368: Input parameters:
369: nrs - work area of size it + 1.
370: vs - index of initial guess
371: vdest - index of result. Note that vs may == vdest (replace
372: guess with the solution).
374: This is an internal routine that knows about the GMRES internals.
375: */
378: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
379: {
380: PetscScalar tt;
382: PetscInt ii,k,j;
383: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
385: /* Solve for solution vector that minimizes the residual */
388: /* If it is < 0, no gmres steps have been performed */
389: if (it < 0) {
390: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
391: return(0);
392: }
393: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));
394: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
395: else nrs[it] = 0.0;
397: for (ii=1; ii<=it; ii++) {
398: k = it - ii;
399: tt = *GRS(k);
400: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
401: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
402: nrs[k] = tt / *HH(k,k);
403: }
405: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
406: VecSet(VEC_TEMP,0.0);
407: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
409: /* Apply deflation */
410: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
411: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
412: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
413: }
414: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
416: /* add solution to previous solution */
417: if (vdest != vs) {
418: VecCopy(vs,vdest);
419: }
420: VecAXPY(vdest,1.0,VEC_TEMP);
421: return(0);
422: }
423: /*
424: Do the scalar work for the orthogonalization. Return new residual norm.
425: */
428: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
429: {
430: PetscScalar *hh,*cc,*ss,tt;
431: PetscInt j;
432: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
435: hh = HH(0,it);
436: cc = CC(0);
437: ss = SS(0);
439: /* Apply all the previously computed plane rotations to the new column
440: of the Hessenberg matrix */
441: for (j=1; j<=it; j++) {
442: tt = *hh;
443: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
444: hh++;
445: *hh = *cc++ * *hh -(*ss++ * tt);
446: }
448: /*
449: compute the new plane rotation, and apply it to:
450: 1) the right-hand-side of the Hessenberg system
451: 2) the new column of the Hessenberg matrix
452: thus obtaining the updated value of the residual
453: */
454: if (!hapend) {
455: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
456: if (tt == 0.0) {
457: ksp->reason = KSP_DIVERGED_NULL;
458: return(0);
459: }
460: *cc = *hh / tt;
461: *ss = *(hh+1) / tt;
462: *GRS(it+1) = -(*ss * *GRS(it));
463: *GRS(it) = PetscConj(*cc) * *GRS(it);
464: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
465: *res = PetscAbsScalar(*GRS(it+1));
466: } else {
467: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
468: another rotation matrix (so RH doesn't change). The new residual is
469: always the new sine term times the residual from last time (GRS(it)),
470: but now the new sine rotation would be zero...so the residual should
471: be zero...so we will multiply "zero" by the last residual. This might
472: not be exactly what we want to do here -could just return "zero". */
474: *res = 0.0;
475: }
476: return(0);
477: }
478: /*
479: This routine allocates more work vectors, starting from VEC_VV(it).
480: */
483: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
484: {
485: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
487: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
490: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
491: /* Adjust the number to allocate to make sure that we don't exceed the
492: number of available slots */
493: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
494: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
495: }
496: if (!nalloc) return(0);
498: dgmres->vv_allocated += nalloc;
500: KSPGetVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
501: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
503: dgmres->mwork_alloc[nwork] = nalloc;
504: for (k=0; k<nalloc; k++) {
505: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
506: }
507: dgmres->nwork_alloc++;
508: return(0);
509: }
513: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
514: {
515: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
519: if (!ptr) {
520: if (!dgmres->sol_temp) {
521: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
522: PetscLogObjectParent(ksp,dgmres->sol_temp);
523: }
524: ptr = dgmres->sol_temp;
525: }
526: if (!dgmres->nrs) {
527: /* allocate the work area */
528: PetscMalloc(dgmres->max_k*sizeof(PetscScalar),&dgmres->nrs);
529: PetscLogObjectMemory(ksp,dgmres->max_k*sizeof(PetscScalar));
530: }
532: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
533: if (result) *result = ptr;
534: return(0);
535: }
539: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
540: {
541: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
543: PetscBool iascii,isharmonic;
546: KSPView_GMRES(ksp,viewer);
547: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
548: if (iascii) {
549: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: FALSE\n");
550: else PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: TRUE\n");
551: PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &isharmonic);
552: if (isharmonic) {
553: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
554: } else {
555: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
556: }
557: PetscViewerASCIIPrintf(viewer, " DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
558: PetscViewerASCIIPrintf(viewer, " DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
559: PetscViewerASCIIPrintf(viewer, " DGMRES: relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
560: PetscViewerASCIIPrintf(viewer, " DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
561: }
562: return(0);
563: }
565: /* New DGMRES functions */
569: static PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
570: {
571: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
574: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
575: dgmres->neig=neig;
576: return(0);
577: }
581: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
582: {
583: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
586: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
587: dgmres->max_neig=max_neig;
588: return(0);
589: }
593: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
594: {
595: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
598: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
599: dgmres->smv=ratio;
600: return(0);
601: }
605: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscInt force)
606: {
607: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
610: if (force != 0 && force != 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
611: dgmres->force = 1;
612: return(0);
613: }
615: extern PetscErrorCode KSPSetFromOptions_GMRES(KSP);
619: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp)
620: {
622: PetscInt neig;
623: PetscInt max_neig;
624: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
625: PetscBool flg;
626: PetscReal smv;
627: PetscInt input;
630: KSPSetFromOptions_GMRES(ksp);
631: PetscOptionsHead("KSP DGMRES Options");
632: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
633: if (flg) {
634: KSPDGMRESSetEigen(ksp, neig);
635: }
636: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
637: if (flg) {
638: KSPDGMRESSetMaxEigen(ksp, max_neig);
639: }
640: PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
641: if (flg) dgmres->smv = smv;
642: PetscOptionsInt("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
643: if (flg) dgmres->improve = input;
644: PetscOptionsInt("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
645: if (flg) dgmres->force = input;
646: PetscOptionsTail();
647: return(0);
648: }
652: static PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
653: {
654: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
656: PetscInt i,j, k;
657: PetscBLASInt nr, bmax;
658: PetscInt r = dgmres->r;
659: PetscInt neig; /* number of eigenvalues to extract at each restart */
660: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
661: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
662: PetscInt N = dgmres->max_k+1;
663: PetscInt n = dgmres->it+1;
664: PetscReal alpha;
665: #if !defined(PETSC_MISSING_LAPACK_GETRF)
666: PetscBLASInt info;
667: #endif
670: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
671: if (dgmres->neig == 0) return(0);
672: if (max_neig < (r+neig1) && !dgmres->improve) {
673: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
674: return(0);
675: }
677: KSPDGMRESComputeSchurForm(ksp, &neig);
678: /* Form the extended Schur vectors X=VV*Sr */
679: if (!XX) {
680: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
681: }
682: for (j = 0; j<neig; j++) {
683: VecZeroEntries(XX[j]);
684: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
685: }
687: /* Orthogonalize X against U */
688: if (!ORTH) {
689: PetscMalloc(max_neig*sizeof(PetscReal), &ORTH);
690: }
691: if (r > 0) {
692: /* modified Gram-Schmidt */
693: for (j = 0; j<neig; j++) {
694: for (i=0; i<r; i++) {
695: /* First, compute U'*X[j] */
696: VecDot(XX[j], UU[i], &alpha);
697: /* Then, compute X(j)=X(j)-U*U'*X(j) */
698: VecAXPY(XX[j], -alpha, UU[i]);
699: }
700: }
701: }
702: /* Compute MX = M^{-1}*A*X */
703: if (!MX) {
704: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
705: }
706: for (j = 0; j<neig; j++) {
707: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
708: }
709: dgmres->matvecs += neig;
711: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
712: KSPDGMRESImproveEig(ksp, neig);
713: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
714: }
716: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
717: if (!XMX) {
718: PetscMalloc(neig1*neig1*sizeof(PetscReal), &XMX);
719: }
720: for (j = 0; j < neig; j++) {
721: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
722: }
724: if (r > 0) {
725: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
726: if (!UMX) {
727: PetscMalloc(max_neig*neig1*sizeof(PetscReal), &UMX);
728: }
729: for (j = 0; j < neig; j++) {
730: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
731: }
732: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
733: if (!XMU) {
734: PetscMalloc(max_neig*neig1*sizeof(PetscReal), &XMU);
735: }
736: for (j = 0; j<r; j++) {
737: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
738: }
739: }
741: /* Form the new matrix T = [T UMX; XMU XMX]; */
742: if (!TT) {
743: PetscMalloc(max_neig*max_neig*sizeof(PetscReal), &TT);
744: }
745: if (r > 0) {
746: /* Add XMU to T */
747: for (j = 0; j < r; j++) {
748: PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
749: }
750: /* Add [UMX; XMX] to T */
751: for (j = 0; j < neig; j++) {
752: k = r+j;
753: PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
754: PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
755: }
756: } else { /* Add XMX to T */
757: for (j = 0; j < neig; j++) {
758: PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
759: }
760: }
762: dgmres->r += neig;
763: r = dgmres->r;
764: PetscBLASIntCast(r,&nr);
765: /*LU Factorize T with Lapack xgetrf routine */
767: PetscBLASIntCast(max_neig,&bmax);
768: if (!TTF) {
769: PetscMalloc(bmax*bmax*sizeof(PetscReal), &TTF);
770: }
771: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
772: if (!INVP) {
773: PetscMalloc(bmax*sizeof(PetscBLASInt), &INVP);
774: }
775: #if defined(PETSC_MISSING_LAPACK_GETRF)
776: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
777: #else
778: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
779: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
780: #endif
782: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
783: if (!UU) {
784: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
785: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
786: }
787: for (j=0; j<neig; j++) {
788: VecCopy(XX[j], UU[r-neig+j]);
789: VecCopy(MX[j], MU[r-neig+j]);
790: }
791: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
792: return(0);
793: }
797: static PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
798: {
799: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
801: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
802: PetscBLASInt bn, bN;
803: PetscReal *A;
804: PetscBLASInt ihi;
805: PetscBLASInt ldA; /* leading dimension of A */
806: PetscBLASInt ldQ; /* leading dimension of Q */
807: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
808: PetscReal *work; /* working vector */
809: PetscBLASInt lwork; /* size of the working vector */
810: PetscInt *perm; /* Permutation vector to sort eigenvalues */
811: PetscInt i, j;
812: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
813: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
814: PetscBLASInt *select;
815: PetscBLASInt *iwork;
816: PetscBLASInt liwork;
817: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
818: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
819: PetscBLASInt nrhs = 1; /* Number of right hand side */
820: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
821: #if !defined(PETSC_MISSING_LAPACK_HSEQR) || !defined(PETSC_MISSING_LAPACK_TRSEN)
822: PetscBLASInt ilo = 1;
823: PetscBLASInt info;
824: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
825: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
826: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
827: #endif
830: PetscBLASIntCast(n,&bn);
831: PetscBLASIntCast(N,&bN);
832: ihi = ldQ = bn;
833: ldA = bN;
834: PetscBLASIntCast(5*N,&lwork);
836: #if defined(PETSC_USE_COMPLEX)
837: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
838: #endif
840: PetscMalloc(ldA*ldA*sizeof(PetscReal), &A);
841: PetscMalloc(ldQ*n*sizeof(PetscReal), &Q);
842: PetscMalloc(lwork*sizeof(PetscReal), &work);
843: if (!dgmres->wr) {
844: PetscMalloc(n*sizeof(PetscReal), &dgmres->wr);
845: PetscMalloc(n*sizeof(PetscReal), &dgmres->wi);
846: }
847: wr = dgmres->wr;
848: wi = dgmres->wi;
849: PetscMalloc(n*sizeof(PetscReal),&modul);
850: PetscMalloc(n*sizeof(PetscInt),&perm);
851: /* copy the Hessenberg matrix to work space */
852: PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
853: PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &flag);
854: if (flag) {
855: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
856: /* Transpose the Hessenberg matrix */
857: PetscMalloc(bn*bn*sizeof(PetscScalar), &Ht);
858: for (i = 0; i < bn; i++) {
859: for (j = 0; j < bn; j++) {
860: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
861: }
862: }
864: /* Solve the system H^T*t = h_{m+1,m}e_m */
865: PetscMalloc(bn*sizeof(PetscScalar), &t);
866: PetscMemzero(t, bn*sizeof(PetscScalar));
867: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
868: PetscMalloc(bn*sizeof(PetscBLASInt), &ipiv);
869: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
870: #if defined(PETSC_MISSING_LAPACK_GESV)
871: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
872: #else
873: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
874: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
875: #endif
876: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
877: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
878: PetscFree(t);
879: PetscFree(Ht);
880: }
881: /* Compute eigenvalues with the Schur form */
882: #if defined(PETSC_MISSING_LAPACK_HSEQR)
883: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
884: #else
885: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
886: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
887: #endif
888: PetscFree(work);
890: /* sort the eigenvalues */
891: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
892: for (i=0; i<n; i++) perm[i] = i;
894: PetscSortRealWithPermutation(n, modul, perm);
895: /* save the complex modulus of the largest eigenvalue in magnitude */
896: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
897: /* count the number of extracted eigenvalues (with complex conjugates) */
898: NbrEig = 0;
899: while (NbrEig < dgmres->neig) {
900: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
901: else NbrEig += 1;
902: }
903: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
905: PetscMalloc(n * sizeof(PetscBLASInt), &select);
906: PetscMemzero(select, n * sizeof(PetscBLASInt));
908: if (!dgmres->GreatestEig) {
909: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
910: } else {
911: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
912: }
913: /* call Lapack dtrsen */
914: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
915: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
916: PetscMalloc(lwork * sizeof(PetscScalar), &work);
917: PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
918: #if defined(PETSC_MISSING_LAPACK_TRSEN)
919: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
920: #else
921: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
922: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
923: #endif
924: PetscFree(select);
926: /* Extract the Schur vectors */
927: for (j = 0; j < NbrEig; j++) {
928: PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
929: }
930: *neig = NbrEig;
931: PetscFree(A);
932: PetscFree(work);
933: PetscFree(perm);
934: PetscFree(work);
935: PetscFree(iwork);
936: PetscFree(modul);
937: PetscFree(Q);
938: return(0);
939: }
943: static PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
944: {
945: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
946: PetscInt i, r = dgmres->r;
948: PetscBLASInt nrhs = 1;
949: PetscReal alpha = 1.0;
950: PetscInt max_neig = dgmres->max_neig;
951: PetscBLASInt br,bmax;
952: PetscInt lambda = dgmres->lambdaN;
953: #if !defined(PETSC_MISSING_LAPACK_GERFS)
954: PetscReal berr, ferr;
955: PetscBLASInt info;
956: #endif
959: PetscBLASIntCast(r,&br);
960: PetscBLASIntCast(max_neig,&bmax);
961: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
962: if (!r) {
963: VecCopy(x,y);
964: return(0);
965: }
966: /* Compute U'*x */
967: if (!X1) {
968: PetscMalloc(bmax*sizeof(PetscReal), &X1);
969: PetscMalloc(bmax*sizeof(PetscReal), &X2);
970: }
971: VecMDot(x, r, UU, X1);
973: /* Solve T*X1=X2 for X1*/
974: PetscMemcpy(X2, X1, br*sizeof(PetscReal));
975: #if defined(PETSC_MISSING_LAPACK_GETRS)
976: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
977: #else
978: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
979: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
980: #endif
981: /* Iterative refinement -- is it really necessary ?? */
982: if (!WORK) {
983: PetscMalloc(3*bmax*sizeof(PetscReal), &WORK);
984: PetscMalloc(bmax*sizeof(PetscBLASInt), &IWORK);
985: }
986: #if defined(PETSC_MISSING_LAPACK_GERFS)
987: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
988: #else
989: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
990: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
991: #endif
993: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
995: /* Compute X2=U*X2 */
996: VecZeroEntries(y);
997: VecMAXPY(y, r, X2, UU);
998: VecAXPY(y, alpha, x);
1000: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1001: return(0);
1002: }
1006: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
1007: {
1008: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
1009: PetscInt j,r_old, r = dgmres->r;
1010: PetscBLASInt i = 0;
1011: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
1012: PetscInt bmax = dgmres->max_neig;
1013: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
1014: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1015: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
1016: PetscBLASInt N; /* size of AUAU */
1017: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
1018: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
1019: PetscReal *work; /* working vector */
1020: PetscBLASInt lwork; /* size of the working vector */
1021: PetscBLASInt info; /* Output info from Lapack routines */
1022: PetscInt *perm; /* Permutation vector to sort eigenvalues */
1023: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1024: PetscInt ierr;
1025: PetscBLASInt NbrEig = 0,nr,bm;
1026: PetscBLASInt *select;
1027: PetscBLASInt liwork, *iwork;
1028: #if !defined(PETSC_MISSING_LAPACK_TGSEN)
1029: PetscReal Dif[2];
1030: PetscBLASInt ijob = 2;
1031: PetscBLASInt wantQ = 1, wantZ = 1;
1032: #endif
1035: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1036: if (!AUU) {
1037: PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUU);
1038: PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUAU);
1039: }
1040: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1041: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1042: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1043: for (j=0; j < r; j++) {
1044: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
1045: }
1046: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1047: for (j = 0; j < neig; j++) {
1048: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
1049: }
1050: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1051: for (j = 0; j < r; j++) {
1052: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
1053: }
1054: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
1055: for (j = 0; j < neig; j++) {
1056: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
1057: }
1059: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1060: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1061: for (j=0; j < r; j++) {
1062: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1063: }
1064: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1065: for (j = 0; j < neig; j++) {
1066: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1067: }
1068: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1069: for (j = 0; j < r; j++) {
1070: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1071: }
1072: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
1073: for (j = 0; j < neig; j++) {
1074: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1075: }
1077: /* Computation of the eigenvectors */
1078: PetscBLASIntCast(aug1,&ldA);
1079: PetscBLASIntCast(aug,&N);
1080: lwork = 8 * N + 20; /* sizeof the working space */
1081: PetscMalloc(N*sizeof(PetscReal), &wr);
1082: PetscMalloc(N*sizeof(PetscReal), &wi);
1083: PetscMalloc(N*sizeof(PetscReal), &beta);
1084: PetscMalloc(N*sizeof(PetscReal), &modul);
1085: PetscMalloc(N*sizeof(PetscInt), &perm);
1086: PetscMalloc(N*N*sizeof(PetscReal), &Q);
1087: PetscMalloc(N*N*sizeof(PetscReal), &Z);
1088: PetscMalloc(lwork*sizeof(PetscReal), &work);
1089: #if defined(PETSC_MISSING_LAPACK_GGES)
1090: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1091: #else
1092: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1093: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1094: #endif
1095: for (i=0; i<N; i++) {
1096: if (beta[i] !=0.0) {
1097: wr[i] /=beta[i];
1098: wi[i] /=beta[i];
1099: }
1100: }
1101: /* sort the eigenvalues */
1102: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1103: for (i=0; i<N; i++) perm[i] = i;
1104: PetscSortRealWithPermutation(N, modul, perm);
1105: /* Save the norm of the largest eigenvalue */
1106: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1107: /* Allocate space to extract the first r schur vectors */
1108: if (!SR2) {
1109: PetscMalloc(aug1*bmax*sizeof(PetscReal), &SR2);
1110: }
1111: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1112: while (NbrEig < bmax) {
1113: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1114: else NbrEig += 2;
1115: }
1116: if (NbrEig > bmax) NbrEig = bmax - 1;
1117: r_old = r; /* previous size of r */
1118: dgmres->r = r = NbrEig;
1120: /* Select the eigenvalues to reorder */
1121: PetscMalloc(N * sizeof(PetscBLASInt), &select);
1122: PetscMemzero(select, N * sizeof(PetscBLASInt));
1123: if (!dgmres->GreatestEig) {
1124: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1125: } else {
1126: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1127: }
1128: /* Reorder and extract the new <r> schur vectors */
1129: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1130: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1131: PetscFree(work);
1132: PetscMalloc(lwork * sizeof(PetscReal), &work);
1133: PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1134: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1135: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1136: #else
1137: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1138: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1139: #endif
1140: PetscFree(select);
1142: for (j=0; j<r; j++) {
1143: PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1144: }
1146: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1147: -- save it temporarily in MU */
1148: for (j = 0; j < r; j++) {
1149: VecZeroEntries(MU[j]);
1150: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1151: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1152: }
1153: /* Form T = U'*MU*U */
1154: for (j = 0; j < r; j++) {
1155: VecCopy(MU[j], UU[j]);
1156: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1157: }
1158: dgmres->matvecs += r;
1159: for (j = 0; j < r; j++) {
1160: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1161: }
1162: /* Factorize T */
1163: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1164: PetscBLASIntCast(r,&nr);
1165: PetscBLASIntCast(bmax,&bm);
1166: #if defined(PETSC_MISSING_LAPACK_GETRF)
1167: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1168: #else
1169: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1170: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1171: #endif
1172: /* Free Memory */
1173: PetscFree(wr);
1174: PetscFree(wi);
1175: PetscFree(beta);
1176: PetscFree(modul);
1177: PetscFree(perm);
1178: PetscFree(Q);
1179: PetscFree(Z);
1180: PetscFree(work);
1181: PetscFree(iwork);
1182: return(0);
1183: }
1185: /* end new DGMRES functions */
1187: /*MC
1188: KSPDGMRES - Implements the deflated GMRES as defined in [1,2]; In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the stagnation occurs.
1189: GMRES Options Database Keys:
1190: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1191: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1192: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1193: vectors are allocated as needed)
1194: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1195: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1196: . -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1197: stability of the classical Gram-Schmidt orthogonalization.
1198: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1199: DGMRES Options Database Keys:
1200: -ksp_dgmres_eigen <neig> - Number of smallest eigenvalues to extract at each restart
1201: -ksp_dgmres_max_eigen <max_neig> - Maximum number of eigenvalues that can be extracted during the iterative process
1202: -ksp_dgmres_force <0, 1> - Use the deflation at each restart; switch off the adaptive strategy.
1204: Level: beginner
1206: Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1208: References:
1210: [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1211: [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.
1212: [3] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1214: Contributed by: Desire NUENTSA WAKAM,INRIA
1216: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1217: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1218: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1219: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1221: M*/
1225: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1226: {
1227: KSP_DGMRES *dgmres;
1231: PetscNewLog(ksp,KSP_DGMRES,&dgmres);
1232: ksp->data = (void*) dgmres;
1234: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1235: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
1237: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1238: ksp->ops->setup = KSPSetUp_DGMRES;
1239: ksp->ops->solve = KSPSolve_DGMRES;
1240: ksp->ops->destroy = KSPDestroy_DGMRES;
1241: ksp->ops->view = KSPView_DGMRES;
1242: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1243: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1244: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1246: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1247: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1248: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1249: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1250: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1251: /* -- New functions defined in DGMRES -- */
1252: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1253: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1254: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1255: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1256: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1257: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1258: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1259: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1261: PetscLogEventRegister("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1262: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1264: dgmres->haptol = 1.0e-30;
1265: dgmres->q_preallocate = 0;
1266: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1267: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1268: dgmres->nrs = 0;
1269: dgmres->sol_temp = 0;
1270: dgmres->max_k = GMRES_DEFAULT_MAXK;
1271: dgmres->Rsvd = 0;
1272: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1273: dgmres->orthogwork = 0;
1275: /* Default values for the deflation */
1276: dgmres->r = 0;
1277: dgmres->neig = DGMRES_DEFAULT_EIG;
1278: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1279: dgmres->lambdaN = 0.0;
1280: dgmres->smv = SMV;
1281: dgmres->force = 0;
1282: dgmres->matvecs = 0;
1283: dgmres->improve = 0;
1284: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1285: dgmres->HasSchur = PETSC_FALSE;
1286: return(0);
1287: }