File: dlaqr23.go

package info (click to toggle)
golang-gonum-v1-gonum 0.15.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 18,792 kB
  • sloc: asm: 6,252; fortran: 5,271; sh: 377; ruby: 211; makefile: 98
file content (423 lines) | stat: -rw-r--r-- 12,505 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package gonum

import (
	"math"

	"gonum.org/v1/gonum/blas"
	"gonum.org/v1/gonum/blas/blas64"
	"gonum.org/v1/gonum/lapack"
)

// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
// trailing principal submatrix using aggressive early deflation [1].
//
// On return, H will be overwritten by a new Hessenberg matrix that is a
// perturbation of an orthogonal similarity transformation of H. It is hoped
// that on output H will have many zero subdiagonal entries.
//
// If wantt is true, the matrix H will be fully updated so that the
// quasi-triangular Schur factor can be computed. If wantt is false, then only
// enough of H will be updated to preserve the eigenvalues.
//
// If wantz is true, the orthogonal similarity transformation will be
// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
//
// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
// of H. It must hold that
//
//	0 <= ilo <= ihi < n     if n > 0,
//	ilo == 0 and ihi == -1  if n == 0,
//
// and the block must be isolated, that is, it must hold that
//
//	ktop == 0   or H[ktop,ktop-1] == 0,
//	kbot == n-1 or H[kbot+1,kbot] == 0,
//
// otherwise Dlaqr23 will panic.
//
// nw is the deflation window size. It must hold that
//
//	0 <= nw <= kbot-ktop+1,
//
// otherwise Dlaqr23 will panic.
//
// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
// will be applied if wantz is true. It must hold that
//
//	0 <= iloz <= ktop,  and  kbot <= ihiz < n,
//
// otherwise Dlaqr23 will panic.
//
// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
//
// v and ldv represent an nw×nw work matrix.
// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
// wv and ldwv represent an nv×nw work matrix.
//
// work must have length at least lwork and lwork must be at least max(1,2*nw),
// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
// efficiency. On return, work[0] will contain the optimal value of lwork.
//
// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
// optimal workspace size and stores it into work[0]. Neither h nor z are
// accessed.
//
// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
// as DLAQR3, for recur == 0 it behaves as DLAQR2.
//
// On return, ns and nd will contain respectively the number of unconverged
// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
// sr and si.
//
// On return, the real and imaginary parts of approximate eigenvalues that may
// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
// and si[kbot-nd-ns+1:kbot-nd+1].
//
// On return, the real and imaginary parts of converged eigenvalues will be
// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
//
// References:
//
//	[1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
//	    Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
//	    URL: http://dx.doi.org/10.1137/S0895479801384585
func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
	switch {
	case n < 0:
		panic(nLT0)
	case ktop < 0 || max(0, n-1) < ktop:
		panic(badKtop)
	case kbot < min(ktop, n-1) || n <= kbot:
		panic(badKbot)
	case nw < 0 || kbot-ktop+1+1 < nw:
		panic(badNw)
	case ldh < max(1, n):
		panic(badLdH)
	case wantz && (iloz < 0 || ktop < iloz):
		panic(badIloz)
	case wantz && (ihiz < kbot || n <= ihiz):
		panic(badIhiz)
	case ldz < 1, wantz && ldz < n:
		panic(badLdZ)
	case ldv < max(1, nw):
		panic(badLdV)
	case nh < nw:
		panic(badNh)
	case ldt < max(1, nh):
		panic(badLdT)
	case nv < 0:
		panic(nvLT0)
	case ldwv < max(1, nw):
		panic(badLdWV)
	case lwork < max(1, 2*nw) && lwork != -1:
		panic(badLWork)
	case len(work) < max(1, lwork):
		panic(shortWork)
	case recur < 0:
		panic(recurLT0)
	}

	// Quick return for zero window size.
	if nw == 0 {
		work[0] = 1
		return 0, 0
	}

	// LAPACK code does not enforce the documented behavior
	//  nw <= kbot-ktop+1
	// but we do (we panic above).
	jw := nw
	lwkopt := max(1, 2*nw)
	if jw > 2 {
		// Workspace query call to Dgehrd.
		impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1)
		lwk1 := int(work[0])
		// Workspace query call to Dormhr.
		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
		lwk2 := int(work[0])
		if recur > 0 {
			// Workspace query call to Dlaqr04.
			impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1)
			lwk3 := int(work[0])
			// Optimal workspace.
			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
		} else {
			// Optimal workspace.
			lwkopt = jw + max(lwk1, lwk2)
		}
	}
	// Quick return in case of workspace query.
	if lwork == -1 {
		work[0] = float64(lwkopt)
		return 0, 0
	}

	// Check input slices only if not doing workspace query.
	switch {
	case len(h) < (n-1)*ldh+n:
		panic(shortH)
	case len(v) < (nw-1)*ldv+nw:
		panic(shortV)
	case len(t) < (nw-1)*ldt+nh:
		panic(shortT)
	case len(wv) < (nv-1)*ldwv+nw:
		panic(shortWV)
	case wantz && len(z) < (n-1)*ldz+n:
		panic(shortZ)
	case len(sr) != kbot+1:
		panic(badLenSr)
	case len(si) != kbot+1:
		panic(badLenSi)
	case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
		panic(notIsolated)
	case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
		panic(notIsolated)
	}

	// Machine constants.
	ulp := dlamchP
	smlnum := float64(n) / ulp * dlamchS

	// Setup deflation window.
	var s float64
	kwtop := kbot - jw + 1
	if kwtop != ktop {
		s = h[kwtop*ldh+kwtop-1]
	}
	if kwtop == kbot {
		// 1×1 deflation window.
		sr[kwtop] = h[kwtop*ldh+kwtop]
		si[kwtop] = 0
		ns = 1
		nd = 0
		if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
			ns = 0
			nd = 1
			if kwtop > ktop {
				h[kwtop*ldh+kwtop-1] = 0
			}
		}
		work[0] = 1
		return ns, nd
	}

	// Convert to spike-triangular form. In case of a rare QR failure, this
	// routine continues to do aggressive early deflation using that part of
	// the deflation window that converged using infqr here and there to
	// keep track.
	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
	bi := blas64.Implementation()
	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
	var infqr int
	if recur > 0 && jw > nmin {
		infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
	} else {
		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
	}
	// Note that ilo == 0 which conveniently coincides with the success
	// value of infqr, that is, infqr as an index always points to the first
	// converged eigenvalue.

	// Dtrexc needs a clean margin near the diagonal.
	for j := 0; j < jw-3; j++ {
		t[(j+2)*ldt+j] = 0
		t[(j+3)*ldt+j] = 0
	}
	if jw >= 3 {
		t[(jw-1)*ldt+jw-3] = 0
	}

	ns = jw
	ilst := infqr
	// Deflation detection loop.
	for ilst < ns {
		bulge := false
		if ns >= 2 {
			bulge = t[(ns-1)*ldt+ns-2] != 0
		}
		if !bulge {
			// Real eigenvalue.
			abst := math.Abs(t[(ns-1)*ldt+ns-1])
			if abst == 0 {
				abst = math.Abs(s)
			}
			if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
				// Deflatable.
				ns--
			} else {
				// Undeflatable, move it up out of the way.
				// Dtrexc can not fail in this case.
				_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
				ilst++
			}
			continue
		}
		// Complex conjugate pair.
		abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
		if abst == 0 {
			abst = math.Abs(s)
		}
		if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
			// Deflatable.
			ns -= 2
		} else {
			// Undeflatable, move them up out of the way.
			// Dtrexc does the right thing with ilst in case of a
			// rare exchange failure.
			_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
			ilst += 2
		}
	}

	// Return to Hessenberg form.
	if ns == 0 {
		s = 0
	}
	if ns < jw {
		// Sorting diagonal blocks of T improves accuracy for graded
		// matrices. Bubble sort deals well with exchange failures.
		sorted := false
		i := ns
		for !sorted {
			sorted = true
			kend := i - 1
			i = infqr
			var k int
			if i == ns-1 || t[(i+1)*ldt+i] == 0 {
				k = i + 1
			} else {
				k = i + 2
			}
			for k <= kend {
				var evi float64
				if k == i+1 {
					evi = math.Abs(t[i*ldt+i])
				} else {
					evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
				}

				var evk float64
				if k == kend || t[(k+1)*ldt+k] == 0 {
					evk = math.Abs(t[k*ldt+k])
				} else {
					evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
				}

				if evi >= evk {
					i = k
				} else {
					sorted = false
					_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
					if ok {
						i = ilst
					} else {
						i = k
					}
				}
				if i == kend || t[(i+1)*ldt+i] == 0 {
					k = i + 1
				} else {
					k = i + 2
				}
			}
		}
	}

	// Restore shift/eigenvalue array from T.
	for i := jw - 1; i >= infqr; {
		if i == infqr || t[i*ldt+i-1] == 0 {
			sr[kwtop+i] = t[i*ldt+i]
			si[kwtop+i] = 0
			i--
			continue
		}
		aa := t[(i-1)*ldt+i-1]
		bb := t[(i-1)*ldt+i]
		cc := t[i*ldt+i-1]
		dd := t[i*ldt+i]
		_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
		i -= 2
	}

	if ns < jw || s == 0 {
		if ns > 1 && s != 0 {
			// Reflect spike back into lower triangle.
			bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
			_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
			work[0] = 1
			impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
			impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
			impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
			impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
			impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
		}

		// Copy updated reduced window into place.
		if kwtop > 0 {
			h[kwtop*ldh+kwtop-1] = s * v[0]
		}
		impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
		bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)

		// Accumulate orthogonal matrix in order to update H and Z, if
		// requested.
		if ns > 1 && s != 0 {
			// work[:ns-1] contains the elementary reflectors stored
			// by a call to Dgehrd above.
			impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
				t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
		}

		// Update vertical slab in H.
		var ltop int
		if !wantt {
			ltop = ktop
		}
		for krow := ltop; krow < kwtop; krow += nv {
			kln := min(nv, kwtop-krow)
			bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
				1, h[krow*ldh+kwtop:], ldh, v, ldv,
				0, wv, ldwv)
			impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
		}

		// Update horizontal slab in H.
		if wantt {
			for kcol := kbot + 1; kcol < n; kcol += nh {
				kln := min(nh, n-kcol)
				bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
					1, v, ldv, h[kwtop*ldh+kcol:], ldh,
					0, t, ldt)
				impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
			}
		}

		// Update vertical slab in Z.
		if wantz {
			for krow := iloz; krow <= ihiz; krow += nv {
				kln := min(nv, ihiz-krow+1)
				bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
					1, z[krow*ldz+kwtop:], ldz, v, ldv,
					0, wv, ldwv)
				impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
			}
		}
	}

	// The number of deflations.
	nd = jw - ns
	// Shifts are converged eigenvalues that could not be deflated.
	// Subtracting infqr from the spike length takes care of the case of a
	// rare QR failure while calculating eigenvalues of the deflation
	// window.
	ns -= infqr
	work[0] = float64(lwkopt)
	return ns, nd
}