File: bicgstab.cpp

package info (click to toggle)
meep-mpich2 1.7.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 25,824 kB
  • sloc: cpp: 27,370; python: 10,574; lisp: 1,213; makefile: 440; sh: 28
file content (232 lines) | stat: -rw-r--r-- 7,253 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/* Copyright (C) 2005-2015 Massachusetts Institute of Technology.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <math.h>
#include <string.h>

#include "meep/mympi.hpp"
#include "bicgstab.hpp"

#include "config.h"

/* bicgstab() implements an iterative solver for non-symmetric linear
   operators, using the algorithm described in:

      Gerard L. G. Sleijpen and Diederik R. Fokkema, "BiCGSTAB(L) for
      linear equations involving unsymmetric matrices with complex
      spectrum," Electronic Trans. on Numerical Analysis 1, 11-32
      (1993).

   and also:

      Gerard L.G. Sleijpen, Henk A. van der Vorst, and Diederik
      R. Fokkema, " BiCGstab(L) and other Hybrid Bi-CG Methods,"
      Numerical Algorithms 7, 75-109 (1994).

   This is a generalization of the stabilized biconjugate-gradient
   (BiCGSTAB) algorithm proposed by van der Vorst (and described
   in the book _Templates for the Solution of Linear Systems_ by
   Barrett et al.)  BiCGSTAB(1) is equivalent to BiCGSTAB, and
   BiCGSTAB(2) is a slightly more efficient version of the BiCGSTAB2
   algorithm by Gutknecht, while BiCGSTAB(L>2) is a further
   generalization.

   The reason that we use this generalization of BiCGSTAB is that the
   BiCGSTAB(1) algorithm was observed by Sleijpen and Fokkema to have
   poor (or even failing) convergence when the linear operator has
   near-pure imaginary eigenvalues.  This is precisely the case for
   our problem (the eigenvalues of the timestep operator are i*omega),
   and we observed precisely such stagnation of convergence.  The
   BiCGSTAB(2) algorithm was reported to fix most such convergence
   problems, and indeed L > 1 seems to converge well for us. */

/* Other variations to explore:

   G. L. G. Sleijpen and H. A. van der Vorst, "Reliable updated
   residuals in hybrid Bi-CG methods," Computing 56 (2), 141-163
   (1996).

   G. L. G. Sleijpen and H. A. van der Vorst, "Maintaining convergence
   properties of BiCGstab methods in finite precision arithmetic,"
   Numerical Algorithms 10, 203-223 (1995).

   See also code on Sleijpen's web page:
                 http://www.math.uu.nl/people/sleijpen/

*/

using namespace std;

namespace meep {

static double dot(size_t n, const realnum *x, const realnum *y)
{
  double sum = 0;
  for (size_t i = 0; i < n; ++i) sum += x[i] * y[i];
  return sum_to_all(sum);
}

static double norm2(size_t n, const realnum *x) {
  // note: we don't just do sqrt(dot(n, x, x)) in order to avoid overflow
  size_t i;
  double xmax = 0, scale;
  long double sum = 0;
  for (i = 0; i < n; ++i) {
    double xabs = fabs(x[i]);
    if (xabs > xmax) xmax = xabs;
  }
  xmax = max_to_all(xmax);
  if (xmax == 0) return 0;
  scale = 1.0 / xmax;
  for (i = 0; i < n; ++i) {
    double xs = scale * x[i];
    sum += xs * xs;
  }
  return xmax * sqrt(sum_to_all(sum));
}

static void xpay(size_t n, realnum *x, double a, const realnum *y) {
  for (size_t m = 0; m < n; ++m) x[m] += a * y[m];
}

#define MIN_OUTPUT_TIME 4.0 // output no more often than this many seconds

typedef realnum *prealnum; // grr, ISO C++ forbids new (double*)[...]

/* BiCGSTAB(L) algorithm for the n-by-n problem Ax = b */
ptrdiff_t bicgstabL(const int L, const size_t n, realnum *x,
	      bicgstab_op A, void *Adata, const realnum *b,
	      const double tol,
	      int *iters,
	      realnum *work,
	      const bool quiet)
{
  if (!work) return (2*L+3)*n; // required workspace

  prealnum *r = new prealnum[L+1];
  prealnum *u = new prealnum[L+1];
  for (int i = 0; i <= L; ++i) {
    r[i] = work + i * n;
    u[i] = work + (L+1 + i) * n;
  }

  double bnrm = norm2(n, b);
  if (bnrm == 0.0) bnrm = 1.0;

  int iter = 0;
  double last_output_wall_time = wall_time();

  double *gamma = new double[L + 1];
  double *gamma_p = new double[L + 1];
  double *gamma_pp = new double[L + 1];

  double *tau = new double[L * L];
  double *sigma = new double[L + 1];

  int ierr = 0; // error code to return, if any
  const double breaktol = 1e-30;

  /**** FIXME: check for breakdown conditions(?) during iteration  ****/

  // rtilde = r[0] = b - Ax
  realnum *rtilde = work + (2*L+2) * n;
  A(x, r[0], Adata);
  for (size_t m = 0; m < n; ++m) rtilde[m] = r[0][m] = b[m] - r[0][m];

  { /* Sleipjen normalizes rtilde in his code; it seems to help slightly */
    double s = 1.0 / norm2(n, rtilde);
    for (size_t m = 0; m < n; ++m) rtilde[m] *= s;
  }

  memset(u[0], 0, sizeof(realnum) * n); // u[0] = 0

  double rho = 1.0, alpha = 0, omega = 1;

  double resid;
  while ((resid = norm2(n, r[0])) > tol * bnrm) {
    ++iter;
    if (!quiet && wall_time() > last_output_wall_time + MIN_OUTPUT_TIME) {
      master_printf("residual[%d] = %g\n", iter, resid / bnrm);
      last_output_wall_time = wall_time();
    }

    rho = -omega * rho;
    for (int j = 0; j < L; ++j) {
      if (fabs(rho) < breaktol) { ierr = -1; goto finish; }
      double rho1 = dot(n, r[j], rtilde);
      double beta = alpha * rho1 / rho;
      rho = rho1;
      for (int i = 0; i <= j; ++i)
      	for (size_t m = 0; m < n; ++m) u[i][m] = r[i][m] - beta * u[i][m];
      A(u[j], u[j+1], Adata);
      alpha = rho / dot(n, u[j+1], rtilde);
      for (int i = 0; i <= j; ++i)
      	xpay(n, r[i], -alpha, u[i+1]);
      A(r[j], r[j+1], Adata);
      xpay(n, x, alpha, u[0]);
    }

    for (int j = 1; j <= L; ++j) {
      for (int i = 1; i < j; ++i) {
      	int ij = (j-1)*L + (i-1);
      	tau[ij] = dot(n, r[j], r[i]) / sigma[i];
      	xpay(n, r[j], -tau[ij], r[i]);
      }
      sigma[j] = dot(n, r[j],r[j]);
      gamma_p[j] = dot(n, r[0], r[j]) / sigma[j];
    }

    omega = gamma[L] = gamma_p[L];
    for (int j = L-1; j >= 1; --j) {
      gamma[j] = gamma_p[j];
      for (int i = j+1; i <= L; ++i)
      	gamma[j] -= tau[(i-1)*L + (j-1)] * gamma[i];
    }
    for (int j = 1; j < L; ++j) {
      gamma_pp[j] = gamma[j+1];
      for (int i = j+1; i < L; ++i)
      	gamma_pp[j] += tau[(i-1)*L + (j-1)] * gamma[i+1];
    }

    xpay(n, x, gamma[1], r[0]);
    xpay(n, r[0], -gamma_p[L], r[L]);
    xpay(n, u[0], -gamma[L], u[L]);
    for (int j = 1; j < L; ++j) { /* TODO: use blas DGEMV (for L > 2) */
      xpay(n, x, gamma_pp[j], r[j]);
      xpay(n, r[0], -gamma_p[j], r[j]);
      xpay(n, u[0], -gamma[j], u[j]);
    }

    if (iter == *iters) { ierr = 1; break; }
  }

  if (!quiet) master_printf("final residual = %g\n", norm2(n, r[0]) / bnrm);

 finish:
  delete[] sigma;
  delete[] tau;
  delete[] gamma_pp;
  delete[] gamma_p;
  delete[] gamma;
  delete[] u;
  delete[] r;

  *iters = iter;
  return ierr;
}

} // namespace meep