1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
|
// Copyright (C) 2016-2019 Yixuan Qiu <yixuan.qiu@cos.name>
// Under MIT license
#ifndef LBFGS_H
#define LBFGS_H
#include <Eigen/Core>
#include "LBFGS/Param.h"
#include "LBFGS/LineSearchBacktracking.h"
#include "LBFGS/LineSearchBracketing.h"
#include "LBFGS/LineSearchNocedalWright.h"
namespace LBFGSpp {
///
/// LBFGS solver for unconstrained numerical optimization
///
template < typename Scalar,
template<class> class LineSearch = LineSearchBacktracking >
class LBFGSSolver
{
private:
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
typedef Eigen::Map<Vector> MapVec;
const LBFGSParam<Scalar>& m_param; // Parameters to control the LBFGS algorithm
Matrix m_s; // History of the s vectors
Matrix m_y; // History of the y vectors
Vector m_ys; // History of the s'y values
Vector m_alpha; // History of the step lengths
Vector m_fx; // History of the objective function values
Vector m_xp; // Old x
Vector m_grad; // New gradient
Vector m_gradp; // Old gradient
Vector m_drt; // Moving direction
inline void reset(int n)
{
const int m = m_param.m;
m_s.resize(n, m);
m_y.resize(n, m);
m_ys.resize(m);
m_alpha.resize(m);
m_xp.resize(n);
m_grad.resize(n);
m_gradp.resize(n);
m_drt.resize(n);
if(m_param.past > 0)
m_fx.resize(m_param.past);
}
public:
///
/// Constructor for LBFGS solver.
///
/// \param param An object of \ref LBFGSParam to store parameters for the
/// algorithm
///
LBFGSSolver(const LBFGSParam<Scalar>& param) :
m_param(param)
{
m_param.check_param();
}
///
/// Minimizing a multivariate function using LBFGS algorithm.
/// Exceptions will be thrown if error occurs.
///
/// \param f A function object such that `f(x, grad)` returns the
/// objective function value at `x`, and overwrites `grad` with
/// the gradient.
/// \param x In: An initial guess of the optimal point. Out: The best point
/// found.
/// \param fx Out: The objective function value at `x`.
///
/// \return Number of iterations used.
///
template <typename Foo>
inline int minimize(Foo& f, Vector& x, Scalar& fx)
{
const int n = x.size();
const int fpast = m_param.past;
reset(n);
// Evaluate function and compute gradient
fx = f(x, m_grad);
Scalar xnorm = x.norm();
Scalar gnorm = m_grad.norm();
if(fpast > 0)
m_fx[0] = fx;
// Early exit if the initial x is already a minimizer
if(gnorm <= m_param.epsilon * std::max(xnorm, Scalar(1.0)))
{
return 1;
}
// Initial direction
m_drt.noalias() = -m_grad;
// Initial step
Scalar step = Scalar(1.0) / m_drt.norm();
int k = 1;
int end = 0;
for( ; ; )
{
// Save the curent x and gradient
m_xp.noalias() = x;
m_gradp.noalias() = m_grad;
// Line search to update x, fx and gradient
LineSearch<Scalar>::LineSearch(f, fx, x, m_grad, step, m_drt, m_xp, m_param);
// New x norm and gradient norm
xnorm = x.norm();
gnorm = m_grad.norm();
// Convergence test -- gradient
if(gnorm <= m_param.epsilon * std::max(xnorm, Scalar(1.0)))
{
return k;
}
// Convergence test -- objective function value
if(fpast > 0)
{
if(k >= fpast && std::abs((m_fx[k % fpast] - fx) / fx) < m_param.delta)
return k;
m_fx[k % fpast] = fx;
}
// Maximum number of iterations
if(m_param.max_iterations != 0 && k >= m_param.max_iterations)
{
return k;
}
// Update s and y
// s_{k+1} = x_{k+1} - x_k
// y_{k+1} = g_{k+1} - g_k
MapVec svec(&m_s(0, end), n);
MapVec yvec(&m_y(0, end), n);
svec.noalias() = x - m_xp;
yvec.noalias() = m_grad - m_gradp;
// ys = y's = 1/rho
// yy = y'y
Scalar ys = yvec.dot(svec);
Scalar yy = yvec.squaredNorm();
m_ys[end] = ys;
// Recursive formula to compute d = -H * g
m_drt.noalias() = -m_grad;
int bound = std::min(m_param.m, k);
end = (end + 1) % m_param.m;
int j = end;
for(int i = 0; i < bound; i++)
{
j = (j + m_param.m - 1) % m_param.m;
MapVec sj(&m_s(0, j), n);
MapVec yj(&m_y(0, j), n);
m_alpha[j] = sj.dot(m_drt) / m_ys[j];
m_drt.noalias() -= m_alpha[j] * yj;
}
m_drt *= (ys / yy);
for(int i = 0; i < bound; i++)
{
MapVec sj(&m_s(0, j), n);
MapVec yj(&m_y(0, j), n);
Scalar beta = yj.dot(m_drt) / m_ys[j];
m_drt.noalias() += (m_alpha[j] - beta) * sj;
j = (j + 1) % m_param.m;
}
// step = 1.0 as initial guess
step = Scalar(1.0);
k++;
}
return k;
}
};
} // namespace LBFGSpp
#endif // LBFGS_H
|