1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
// Copyright ©2021 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/blas/blas64"
"gonum.org/v1/gonum/lapack"
)
// Dlatdf computes a contribution to the reciprocal Dif-estimate by solving
//
// Z * x = h - f
//
// and choosing the vector h such that the norm of x is as large as possible.
//
// The n×n matrix Z is represented by its LU factorization as computed by Dgetc2
// and has the form
//
// Z = P * L * U * Q
//
// where P and Q are permutation matrices, L is lower triangular with unit
// diagonal elements and U is upper triangular.
//
// job specifies the heuristic method for computing the contribution.
//
// If job is lapack.LocalLookAhead, all entries of h are chosen as either +1 or
// -1.
//
// If job is lapack.NormalizedNullVector, an approximate null-vector e of Z is
// computed using Dgecon and normalized. h is chosen as ±e with the sign giving
// the greater value of 2-norm(x). This strategy is about 5 times as expensive
// as LocalLookAhead.
//
// On entry, rhs holds the contribution f from earlier solved sub-systems. On
// return, rhs holds the solution x.
//
// ipiv and jpiv contain the pivot indices as returned by Dgetc2: row i of the
// matrix has been interchanged with row ipiv[i] and column j of the matrix has
// been interchanged with column jpiv[j].
//
// n must be at most 8, ipiv and jpiv must have length n, and rhs must have
// length at least n, otherwise Dlatdf will panic.
//
// rdsum and rdscal represent the sum of squares of computed contributions to
// the Dif-estimate from earlier solved sub-systems. rdscal is the scaling
// factor used to prevent overflow in rdsum. Dlatdf returns this sum of squares
// updated with the contributions from the current sub-system.
//
// Dlatdf is an internal routine. It is exported for testing purposes.
func (impl Implementation) Dlatdf(job lapack.MaximizeNormXJob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (scale, sum float64) {
switch {
case job != lapack.LocalLookAhead && job != lapack.NormalizedNullVector:
panic(badMaximizeNormXJob)
case n < 0:
panic(nLT0)
case n > 8:
panic("lapack: n > 8")
case ldz < max(1, n):
panic(badLdZ)
}
// Quick return if possible.
if n == 0 {
return
}
switch {
case len(z) < (n-1)*ldz+n:
panic(shortZ)
case len(rhs) < n:
panic(shortRHS)
case len(ipiv) != n:
panic(badLenIpiv)
case len(jpiv) != n:
panic(badLenJpiv)
}
const maxdim = 8
var (
xps [maxdim]float64
xms [maxdim]float64
work [4 * maxdim]float64
iwork [maxdim]int
)
bi := blas64.Implementation()
xp := xps[:n]
xm := xms[:n]
if job == lapack.NormalizedNullVector {
// Compute approximate nullvector xm of Z.
_ = impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work[:], iwork[:])
// This relies on undocumented content in work[n:2*n] stored by Dgecon.
bi.Dcopy(n, work[n:], 1, xm, 1)
// Compute rhs.
impl.Dlaswp(1, xm, 1, 0, n-2, ipiv[:n-1], -1)
tmp := 1 / bi.Dnrm2(n, xm, 1)
bi.Dscal(n, tmp, xm, 1)
bi.Dcopy(n, xm, 1, xp, 1)
bi.Daxpy(n, 1, rhs, 1, xp, 1)
bi.Daxpy(n, -1.0, xm, 1, rhs, 1)
_ = impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv)
_ = impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv)
if bi.Dasum(n, xp, 1) > bi.Dasum(n, rhs, 1) {
bi.Dcopy(n, xp, 1, rhs, 1)
}
// Compute and return the updated sum of squares.
return impl.Dlassq(n, rhs, 1, rdscal, rdsum)
}
// Apply permutations ipiv to rhs
impl.Dlaswp(1, rhs, 1, 0, n-2, ipiv[:n-1], 1)
// Solve for L-part choosing rhs either to +1 or -1.
pmone := -1.0
for j := 0; j < n-2; j++ {
bp := rhs[j] + 1
bm := rhs[j] - 1
// Look-ahead for L-part rhs[0:n-2] = +1 or -1, splus and sminu computed
// more efficiently than in https://doi.org/10.1109/9.29404.
splus := 1 + bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, z[(j+1)*ldz+j:], ldz)
sminu := bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1)
splus *= rhs[j]
switch {
case splus > sminu:
rhs[j] = bp
case sminu > splus:
rhs[j] = bm
default:
// In this case the updating sums are equal and we can choose rsh[j]
// +1 or -1. The first time this happens we choose -1, thereafter
// +1. This is a simple way to get good estimates of matrices like
// Byers well-known example (see https://doi.org/10.1109/9.29404).
rhs[j] += pmone
pmone = 1
}
// Compute remaining rhs.
bi.Daxpy(n-j-1, -rhs[j], z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1)
}
// Solve for U-part, look-ahead for rhs[n-1] = ±1. This is not done in
// Bsolve and will hopefully give us a better estimate because any
// ill-conditioning of the original matrix is transferred to U and not to L.
// U[n-1,n-1] is an approximation to sigma_min(LU).
bi.Dcopy(n-1, rhs, 1, xp, 1)
xp[n-1] = rhs[n-1] + 1
rhs[n-1] -= 1
var splus, sminu float64
for i := n - 1; i >= 0; i-- {
tmp := 1 / z[i*ldz+i]
xp[i] *= tmp
rhs[i] *= tmp
for k := i + 1; k < n; k++ {
xp[i] -= xp[k] * (z[i*ldz+k] * tmp)
rhs[i] -= rhs[k] * (z[i*ldz+k] * tmp)
}
splus += math.Abs(xp[i])
sminu += math.Abs(rhs[i])
}
if splus > sminu {
bi.Dcopy(n, xp, 1, rhs, 1)
}
// Apply the permutations jpiv to the computed solution (rhs).
impl.Dlaswp(1, rhs, 1, 0, n-2, jpiv[:n-1], -1)
// Compute and return the updated sum of squares.
return impl.Dlassq(n, rhs, 1, rdscal, rdsum)
}
|