1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"gonum.org/v1/gonum/floats"
"gonum.org/v1/gonum/floats/scalar"
)
const (
initialStepFactor = 1
quadraticMinimumStepSize = 1e-3
quadraticMaximumStepSize = 1
quadraticThreshold = 1e-12
firstOrderMinimumStepSize = quadraticMinimumStepSize
firstOrderMaximumStepSize = quadraticMaximumStepSize
)
var (
_ StepSizer = ConstantStepSize{}
_ StepSizer = (*QuadraticStepSize)(nil)
_ StepSizer = (*FirstOrderStepSize)(nil)
)
// ConstantStepSize is a StepSizer that returns the same step size for
// every iteration.
type ConstantStepSize struct {
Size float64
}
func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 {
return c.Size
}
func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 {
return c.Size
}
// QuadraticStepSize estimates the initial line search step size as the minimum
// of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k.
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
// The step size is bounded away from zero.
type QuadraticStepSize struct {
// Threshold determines that the initial step size should be estimated by
// quadratic interpolation when the relative change in the objective
// function is larger than Threshold. Otherwise the initial step size is
// set to 2*previous step size.
// If Threshold is zero, it will be set to 1e-12.
Threshold float64
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
fPrev float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if q.Threshold == 0 {
q.Threshold = quadraticThreshold
}
if q.InitialStepFactor == 0 {
q.InitialStepFactor = initialStepFactor
}
if q.MinStepSize == 0 {
q.MinStepSize = quadraticMinimumStepSize
}
if q.MaxStepSize == 0 {
q.MaxStepSize = quadraticMaximumStepSize
}
if q.MaxStepSize <= q.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = floats.Dot(loc.Gradient, dir)
q.xPrev = resize(q.xPrev, len(loc.X))
copy(q.xPrev, loc.X)
return stepSize
}
func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = 2 * stepSizePrev
if !scalar.EqualWithinRel(q.fPrev, loc.F, q.Threshold) {
// Two consecutive function values are not relatively equal, so
// computing the minimum of a quadratic interpolant might make sense
df := (loc.F - q.fPrev) / stepSizePrev
quadTest := df - q.projGradPrev
if quadTest > 0 {
// There is a chance of approximating the function well by a
// quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev
// is larger than ∇f_{k-1}⋅p_{k-1}
// Set the step size to the minimizer of the quadratic function that
// interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k
stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2
}
}
// Bound the step size to lie in [MinStepSize, MaxStepSize]
stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = projGrad
copy(q.xPrev, loc.X)
return stepSize
}
// FirstOrderStepSize estimates the initial line search step size based on the
// assumption that the first-order change in the function will be the same as
// that obtained at the previous iteration. That is, the initial step size s^0_k
// is chosen so that
//
// s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1}
//
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
type FirstOrderStepSize struct {
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if fo.InitialStepFactor == 0 {
fo.InitialStepFactor = initialStepFactor
}
if fo.MinStepSize == 0 {
fo.MinStepSize = firstOrderMinimumStepSize
}
if fo.MaxStepSize == 0 {
fo.MaxStepSize = firstOrderMaximumStepSize
}
if fo.MaxStepSize <= fo.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
fo.xPrev = resize(fo.xPrev, len(loc.X))
copy(fo.xPrev, loc.X)
return stepSize
}
func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = stepSizePrev * fo.projGradPrev / projGrad
stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
copy(fo.xPrev, loc.X)
return stepSize
}
|