1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
|
# Copyright (C) 2009, Pauli Virtanen <pav@iki.fi>
# Distributed under the same license as Scipy.
from __future__ import division, print_function, absolute_import
import numpy as np
from scipy._lib.six import xrange
from scipy.linalg import get_blas_funcs, get_lapack_funcs, qr_insert, lstsq
from .utils import make_system
__all__ = ['lgmres']
def lgmres(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None,
inner_m=30, outer_k=3, outer_v=None, store_outer_Av=True):
"""
Solve a matrix equation using the LGMRES algorithm.
The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems
in the convergence in restarted GMRES, and often converges in fewer
iterations.
Parameters
----------
A : {sparse matrix, dense matrix, LinearOperator}
The real or complex N-by-N matrix of the linear system.
b : {array, matrix}
Right hand side of the linear system. Has shape (N,) or (N,1).
x0 : {array, matrix}
Starting guess for the solution.
tol : float, optional
Tolerance to achieve. The algorithm terminates when either the relative
or the absolute residual is below `tol`.
maxiter : int, optional
Maximum number of iterations. Iteration will stop after maxiter
steps even if the specified tolerance has not been achieved.
M : {sparse matrix, dense matrix, LinearOperator}, optional
Preconditioner for A. The preconditioner should approximate the
inverse of A. Effective preconditioning dramatically improves the
rate of convergence, which implies that fewer iterations are needed
to reach a given error tolerance.
callback : function, optional
User-supplied function to call after each iteration. It is called
as callback(xk), where xk is the current solution vector.
inner_m : int, optional
Number of inner GMRES iterations per each outer iteration.
outer_k : int, optional
Number of vectors to carry between inner GMRES iterations.
According to [1]_, good values are in the range of 1...3.
However, note that if you want to use the additional vectors to
accelerate solving multiple similar problems, larger values may
be beneficial.
outer_v : list of tuples, optional
List containing tuples ``(v, Av)`` of vectors and corresponding
matrix-vector products, used to augment the Krylov subspace, and
carried between inner GMRES iterations. The element ``Av`` can
be `None` if the matrix-vector product should be re-evaluated.
This parameter is modified in-place by `lgmres`, and can be used
to pass "guess" vectors in and out of the algorithm when solving
similar problems.
store_outer_Av : bool, optional
Whether LGMRES should store also A*v in addition to vectors `v`
in the `outer_v` list. Default is True.
Returns
-------
x : array or matrix
The converged solution.
info : int
Provides convergence information:
- 0 : successful exit
- >0 : convergence to tolerance not achieved, number of iterations
- <0 : illegal input or breakdown
Notes
-----
The LGMRES algorithm [1]_ [2]_ is designed to avoid the
slowing of convergence in restarted GMRES, due to alternating
residual vectors. Typically, it often outperforms GMRES(m) of
comparable memory requirements by some measure, or at least is not
much worse.
Another advantage in this algorithm is that you can supply it with
'guess' vectors in the `outer_v` argument that augment the Krylov
subspace. If the solution lies close to the span of these vectors,
the algorithm converges faster. This can be useful if several very
similar matrices need to be inverted one after another, such as in
Newton-Krylov iteration where the Jacobian matrix often changes
little in the nonlinear steps.
References
----------
.. [1] A.H. Baker and E.R. Jessup and T. Manteuffel,
SIAM J. Matrix Anal. Appl. 26, 962 (2005).
.. [2] A.H. Baker, PhD thesis, University of Colorado (2003).
http://amath.colorado.edu/activities/thesis/allisonb/Thesis.ps
"""
A,M,x,b,postprocess = make_system(A,M,x0,b)
if not np.isfinite(b).all():
raise ValueError("RHS must contain only finite numbers")
matvec = A.matvec
psolve = M.matvec
if outer_v is None:
outer_v = []
axpy, dot, scal = None, None, None
nrm2 = get_blas_funcs('nrm2', [b])
b_norm = nrm2(b)
if b_norm == 0:
b_norm = 1
for k_outer in xrange(maxiter):
r_outer = matvec(x) - b
# -- callback
if callback is not None:
callback(x)
# -- determine input type routines
if axpy is None:
if np.iscomplexobj(r_outer) and not np.iscomplexobj(x):
x = x.astype(r_outer.dtype)
axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'],
(x, r_outer))
trtrs = get_lapack_funcs('trtrs', (x, r_outer))
# -- check stopping condition
r_norm = nrm2(r_outer)
if r_norm <= tol * b_norm or r_norm <= tol:
break
# -- inner LGMRES iteration
vs0 = -psolve(r_outer)
inner_res_0 = nrm2(vs0)
if inner_res_0 == 0:
rnorm = nrm2(r_outer)
raise RuntimeError("Preconditioner returned a zero vector; "
"|v| ~ %.1g, |M v| = 0" % rnorm)
vs0 = scal(1.0/inner_res_0, vs0)
vs = [vs0]
ws = []
y = None
# H is stored in QR factorized form
Q = np.ones((1, 1), dtype=vs0.dtype)
R = np.zeros((1, 0), dtype=vs0.dtype)
eps = np.finfo(vs0.dtype).eps
breakdown = False
for j in xrange(1, 1 + inner_m + len(outer_v)):
# -- Arnoldi process:
#
# Build an orthonormal basis V and matrices W and H such that
# A W = V H
# Columns of W, V, and H are stored in `ws`, `vs` and `hs`.
#
# The first column of V is always the residual vector, `vs0`;
# V has *one more column* than the other of the three matrices.
#
# The other columns in V are built by feeding in, one
# by one, some vectors `z` and orthonormalizing them
# against the basis so far. The trick here is to
# feed in first some augmentation vectors, before
# starting to construct the Krylov basis on `v0`.
#
# It was shown in [BJM]_ that a good choice (the LGMRES choice)
# for these augmentation vectors are the `dx` vectors obtained
# from a couple of the previous restart cycles.
#
# Note especially that while `vs0` is always the first
# column in V, there is no reason why it should also be
# the first column in W. (In fact, below `vs0` comes in
# W only after the augmentation vectors.)
#
# The rest of the algorithm then goes as in GMRES, one
# solves a minimization problem in the smaller subspace
# spanned by W (range) and V (image).
#
# ++ evaluate
v_new = None
if j < len(outer_v) + 1:
z, v_new = outer_v[j-1]
elif j == len(outer_v) + 1:
z = vs0
else:
z = vs[-1]
if v_new is None:
v_new = psolve(matvec(z))
else:
# Note: v_new is modified in-place below. Must make a
# copy to ensure that the outer_v vectors are not
# clobbered.
v_new = v_new.copy()
# ++ orthogonalize
v_new_norm = nrm2(v_new)
hcur = np.zeros(j+1, dtype=Q.dtype)
for i, v in enumerate(vs):
alpha = dot(v, v_new)
hcur[i] = alpha
v_new = axpy(v, v_new, v.shape[0], -alpha) # v_new -= alpha*v
hcur[-1] = nrm2(v_new)
with np.errstate(over='ignore', divide='ignore'):
# Careful with denormals
alpha = 1/hcur[-1]
if np.isfinite(alpha):
v_new = scal(alpha, v_new)
if not (hcur[-1] > eps * v_new_norm):
# v_new essentially in the span of previous vectors,
# or we have nans. Bail out after updating the QR
# solution.
breakdown = True
vs.append(v_new)
ws.append(z)
# -- GMRES optimization problem
# Add new column to H=Q*R, padding other columns with zeros
Q2 = np.zeros((j+1, j+1), dtype=Q.dtype, order='F')
Q2[:j,:j] = Q
Q2[j,j] = 1
R2 = np.zeros((j+1, j-1), dtype=R.dtype, order='F')
R2[:j,:] = R
Q, R = qr_insert(Q2, R2, hcur, j-1, which='col',
overwrite_qru=True, check_finite=False)
# Transformed least squares problem
# || Q R y - inner_res_0 * e_1 ||_2 = min!
# Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0]
# Residual is immediately known
inner_res = abs(Q[0,-1]) * inner_res_0
# -- check for termination
if inner_res <= tol * inner_res_0 or breakdown:
break
if not np.isfinite(R[j-1,j-1]):
# nans encountered, bail out
return postprocess(x), k_outer + 1
# -- Get the LSQ problem solution
#
# The problem is triangular, but the condition number may be
# bad (or in case of breakdown the last diagonal entry may be
# zero), so use lstsq instead of trtrs.
y, _, _, _, = lstsq(R[:j,:j], Q[0,:j].conj())
y *= inner_res_0
if not np.isfinite(y).all():
# Floating point over/underflow, non-finite result from
# matmul etc. -- report failure.
return postprocess(x), k_outer + 1
# -- GMRES terminated: eval solution
dx = ws[0]*y[0]
for w, yc in zip(ws[1:], y[1:]):
dx = axpy(w, dx, dx.shape[0], yc) # dx += w*yc
# -- Store LGMRES augmentation vectors
nx = nrm2(dx)
if nx > 0:
if store_outer_Av:
q = Q.dot(R.dot(y))
ax = vs[0]*q[0]
for v, qc in zip(vs[1:], q[1:]):
ax = axpy(v, ax, ax.shape[0], qc)
outer_v.append((dx/nx, ax/nx))
else:
outer_v.append((dx/nx, None))
# -- Retain only a finite number of augmentation vectors
while len(outer_v) > outer_k:
del outer_v[0]
# -- Apply step
x += dx
else:
# didn't converge ...
return postprocess(x), maxiter
return postprocess(x), 0
|