File: dpbtrf.go

package info (click to toggle)
golang-gonum-v1-gonum 0.15.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 18,792 kB
  • sloc: asm: 6,252; fortran: 5,271; sh: 377; ruby: 211; makefile: 98
file content (216 lines) | stat: -rw-r--r-- 6,403 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package gonum

import (
	"gonum.org/v1/gonum/blas"
	"gonum.org/v1/gonum/blas/blas64"
)

// Dpbtrf computes the Cholesky factorization of an n×n symmetric positive
// definite band matrix
//
//	A = Uᵀ * U  if uplo == blas.Upper
//	A = L * Lᵀ  if uplo == blas.Lower
//
// where U is an upper triangular band matrix and L is lower triangular. kd is
// the number of super- or sub-diagonals of A.
//
// The band storage scheme is illustrated below when n = 6 and kd = 2. Elements
// marked * are not used by the function.
//
//	uplo == blas.Upper
//	On entry:         On return:
//	 a00  a01  a02     u00  u01  u02
//	 a11  a12  a13     u11  u12  u13
//	 a22  a23  a24     u22  u23  u24
//	 a33  a34  a35     u33  u34  u35
//	 a44  a45   *      u44  u45   *
//	 a55   *    *      u55   *    *
//
//	uplo == blas.Lower
//	On entry:         On return:
//	  *    *   a00       *    *   l00
//	  *   a10  a11       *   l10  l11
//	 a20  a21  a22      l20  l21  l22
//	 a31  a32  a33      l31  l32  l33
//	 a42  a43  a44      l42  l43  l44
//	 a53  a54  a55      l53  l54  l55
func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) {
	const nbmax = 32

	switch {
	case uplo != blas.Upper && uplo != blas.Lower:
		panic(badUplo)
	case n < 0:
		panic(nLT0)
	case kd < 0:
		panic(kdLT0)
	case ldab < kd+1:
		panic(badLdA)
	}

	// Quick return if possible.
	if n == 0 {
		return true
	}

	if len(ab) < (n-1)*ldab+kd+1 {
		panic(shortAB)
	}

	opts := string(blas.Upper)
	if uplo == blas.Lower {
		opts = string(blas.Lower)
	}
	nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1)
	// The block size must not exceed the semi-bandwidth kd, and must not
	// exceed the limit set by the size of the local array work.
	nb = min(nb, nbmax)

	if nb <= 1 || kd < nb {
		// Use unblocked code.
		return impl.Dpbtf2(uplo, n, kd, ab, ldab)
	}

	// Use blocked code.
	ldwork := nb
	work := make([]float64, nb*ldwork)
	bi := blas64.Implementation()
	if uplo == blas.Upper {
		// Compute the Cholesky factorization of a symmetric band
		// matrix, given the upper triangle of the matrix in band
		// storage.

		// Process the band matrix one diagonal block at a time.
		for i := 0; i < n; i += nb {
			ib := min(nb, n-i)
			// Factorize the diagonal block.
			ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1)
			if !ok {
				return false
			}
			if i+ib >= n {
				continue
			}
			// Update the relevant part of the trailing submatrix.
			// If A11 denotes the diagonal block which has just been
			// factorized, then we need to update the remaining
			// blocks in the diagram:
			//
			//  A11   A12   A13
			//        A22   A23
			//              A33
			//
			// The numbers of rows and columns in the partitioning
			// are ib, i2, i3 respectively. The blocks A12, A22 and
			// A23 are empty if ib = kd. The upper triangle of A13
			// lies outside the band.
			i2 := min(kd-ib, n-i-ib)
			if i2 > 0 {
				// Update A12.
				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2,
					1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1)
				// Update A22.
				bi.Dsyrk(blas.Upper, blas.Trans, i2, ib,
					-1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1)
			}
			i3 := min(ib, n-i-kd)
			if i3 > 0 {
				// Copy the lower triangle of A13 into the work array.
				for ii := 0; ii < ib; ii++ {
					for jj := 0; jj <= min(ii, i3-1); jj++ {
						work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj]
					}
				}
				// Update A13 (in the work array).
				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3,
					1, ab[i*ldab:], ldab-1, work, ldwork)
				// Update A23.
				if i2 > 0 {
					bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib,
						-1, ab[i*ldab+ib:], ldab-1, work, ldwork,
						1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
				}
				// Update A33.
				bi.Dsyrk(blas.Upper, blas.Trans, i3, ib,
					-1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1)
				// Copy the lower triangle of A13 back into place.
				for ii := 0; ii < ib; ii++ {
					for jj := 0; jj <= min(ii, i3-1); jj++ {
						ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj]
					}
				}
			}
		}
	} else {
		// Compute the Cholesky factorization of a symmetric band
		// matrix, given the lower triangle of the matrix in band
		// storage.

		// Process the band matrix one diagonal block at a time.
		for i := 0; i < n; i += nb {
			ib := min(nb, n-i)
			// Factorize the diagonal block.
			ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1)
			if !ok {
				return false
			}
			if i+ib >= n {
				continue
			}
			// Update the relevant part of the trailing submatrix.
			// If A11 denotes the diagonal block which has just been
			// factorized, then we need to update the remaining
			// blocks in the diagram:
			//
			//  A11
			//  A21   A22
			//  A31   A32   A33
			//
			// The numbers of rows and columns in the partitioning
			// are ib, i2, i3 respectively. The blocks A21, A22 and
			// A32 are empty if ib = kd. The lowr triangle of A31
			// lies outside the band.
			i2 := min(kd-ib, n-i-ib)
			if i2 > 0 {
				// Update A21.
				bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib,
					1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
				// Update A22.
				bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib,
					-1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1)
			}
			i3 := min(ib, n-i-kd)
			if i3 > 0 {
				// Copy the upper triangle of A31 into the work array.
				for ii := 0; ii < i3; ii++ {
					for jj := ii; jj < ib; jj++ {
						work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii]
					}
				}
				// Update A31 (in the work array).
				bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib,
					1, ab[i*ldab+kd:], ldab-1, work, ldwork)
				// Update A32.
				if i2 > 0 {
					bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib,
						-1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1,
						1, ab[(i+kd)*ldab+ib:], ldab-1)
				}
				// Update A33.
				bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib,
					-1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1)
				// Copy the upper triangle of A31 back into place.
				for ii := 0; ii < i3; ii++ {
					for jj := ii; jj < ib; jj++ {
						ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj]
					}
				}
			}
		}
	}
	return true
}