1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
|
// Copyright (C) 2020-2025 Yixuan Qiu <yixuan.qiu@cos.name>
// Under MIT license
#ifndef LBFGSPP_SUBSPACE_MIN_H
#define LBFGSPP_SUBSPACE_MIN_H
#include <stdexcept>
#include <vector>
#include <Eigen/Core>
#include "BFGSMat.h"
/// \cond
namespace LBFGSpp {
//
// Subspace minimization procedure of the L-BFGS-B algorithm,
// mainly for internal use.
//
// The target of subspace minimization is to minimize the quadratic function m(x)
// over the free variables, subject to the bound condition.
// Free variables stand for coordinates that are not at the boundary in xcp,
// the generalized Cauchy point.
//
// In the classical implementation of L-BFGS-B [1], the minimization is done by first
// ignoring the box constraints, followed by a line search. Our implementation is
// an exact minimization subject to the bounds, based on the BOXCQP algorithm [2].
//
// Reference:
// [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.
// [2] C. Voglis and I. E. Lagaris (2004). BOXCQP: An algorithm for bound constrained convex quadratic problems.
//
template <typename Scalar>
class SubspaceMin
{
private:
using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
using IndexSet = std::vector<int>;
// v[ind]
static Vector subvec(const Vector& v, const IndexSet& ind)
{
const int nsub = ind.size();
Vector res(nsub);
for (int i = 0; i < nsub; i++)
res[i] = v[ind[i]];
return res;
}
// v[ind] = rhs
static void subvec_assign(Vector& v, const IndexSet& ind, const Vector& rhs)
{
const int nsub = ind.size();
for (int i = 0; i < nsub; i++)
v[ind[i]] = rhs[i];
}
// Check whether the vector is within the bounds
static bool in_bounds(const Vector& x, const Vector& lb, const Vector& ub)
{
const int n = x.size();
for (int i = 0; i < n; i++)
{
if (x[i] < lb[i] || x[i] > ub[i])
return false;
}
return true;
}
// Test convergence of P set
static bool P_converged(const IndexSet& yP_set, const Vector& vecy, const Vector& vecl, const Vector& vecu)
{
const int nP = yP_set.size();
for (int i = 0; i < nP; i++)
{
const int coord = yP_set[i];
if (vecy[coord] < vecl[coord] || vecy[coord] > vecu[coord])
return false;
}
return true;
}
// Test convergence of L set
static bool L_converged(const IndexSet& yL_set, const Vector& lambda)
{
const int nL = yL_set.size();
for (int i = 0; i < nL; i++)
{
const int coord = yL_set[i];
if (lambda[coord] < Scalar(0))
return false;
}
return true;
}
// Test convergence of L set
static bool U_converged(const IndexSet& yU_set, const Vector& mu)
{
const int nU = yU_set.size();
for (int i = 0; i < nU; i++)
{
const int coord = yU_set[i];
if (mu[coord] < Scalar(0))
return false;
}
return true;
}
public:
// bfgs: An object that represents the BFGS approximation matrix.
// x0: Current parameter vector.
// xcp: Computed generalized Cauchy point.
// g: Gradient at x0.
// lb: Lower bounds for x.
// ub: Upper bounds for x.
// Wd: W'(xcp - x0)
// newact_set: Coordinates that newly become active during the GCP procedure.
// fv_set: Free variable set.
// maxit: Maximum number of iterations.
// drt: The output direction vector, drt = xsm - x0.
static void subspace_minimize(
const BFGSMat<Scalar, true>& bfgs, const Vector& x0, const Vector& xcp, const Vector& g,
const Vector& lb, const Vector& ub, const Vector& Wd, const IndexSet& newact_set, const IndexSet& fv_set, int maxit,
Vector& drt)
{
// std::cout << "========================= Entering subspace minimization =========================\n\n";
// d = xcp - x0
drt.noalias() = xcp - x0;
// Size of free variables
const int nfree = fv_set.size();
// If there is no free variable, simply return drt
if (nfree < 1)
{
// std::cout << "========================= (Early) leaving subspace minimization =========================\n\n";
return;
}
// std::cout << "New active set = [ "; for(std::size_t i = 0; i < newact_set.size(); i++) std::cout << newact_set[i] << " "; std::cout << "]\n";
// std::cout << "Free variable set = [ "; for(std::size_t i = 0; i < fv_set.size(); i++) std::cout << fv_set[i] << " "; std::cout << "]\n\n";
// Extract the rows of W in the free variable set
Matrix WF = bfgs.Wb(fv_set);
// Compute F'BAb = -F'WMW'AA'd
Vector vecc(nfree);
bfgs.compute_FtBAb(WF, fv_set, newact_set, Wd, drt, vecc);
// Set the vector c=F'BAb+F'g for linear term, and vectors l and u for the new bounds
Vector vecl(nfree), vecu(nfree);
for (int i = 0; i < nfree; i++)
{
const int coord = fv_set[i];
vecl[i] = lb[coord] - x0[coord];
vecu[i] = ub[coord] - x0[coord];
vecc[i] += g[coord];
}
// Solve y = -inv(B[F, F]) * c
Vector vecy(nfree);
bfgs.solve_PtBP(WF, -vecc, vecy);
// Test feasibility
// If yes, then the solution has been found
if (in_bounds(vecy, vecl, vecu))
{
subvec_assign(drt, fv_set, vecy);
return;
}
// Otherwise, enter the iterations
// Make a copy of y as a fallback solution
Vector yfallback = vecy;
// Dual variables
Vector lambda = Vector::Zero(nfree), mu = Vector::Zero(nfree);
// Iterations
IndexSet L_set, U_set, P_set, yL_set, yU_set, yP_set;
L_set.reserve(nfree / 3);
yL_set.reserve(nfree / 3);
U_set.reserve(nfree / 3);
yU_set.reserve(nfree / 3);
P_set.reserve(nfree);
yP_set.reserve(nfree);
int k;
for (k = 0; k < maxit; k++)
{
// Construct the L, U, and P sets, and then update values
// Indices in original drt vector
L_set.clear();
U_set.clear();
P_set.clear();
// Indices in y
yL_set.clear();
yU_set.clear();
yP_set.clear();
for (int i = 0; i < nfree; i++)
{
const int coord = fv_set[i];
const Scalar li = vecl[i], ui = vecu[i];
if ((vecy[i] < li) || (vecy[i] == li && lambda[i] >= Scalar(0)))
{
L_set.push_back(coord);
yL_set.push_back(i);
vecy[i] = li;
mu[i] = Scalar(0);
}
else if ((vecy[i] > ui) || (vecy[i] == ui && mu[i] >= Scalar(0)))
{
U_set.push_back(coord);
yU_set.push_back(i);
vecy[i] = ui;
lambda[i] = Scalar(0);
}
else
{
P_set.push_back(coord);
yP_set.push_back(i);
lambda[i] = Scalar(0);
mu[i] = Scalar(0);
}
}
/* std::cout << "** Iter " << k << " **\n";
std::cout << " L = [ "; for(std::size_t i = 0; i < L_set.size(); i++) std::cout << L_set[i] << " "; std::cout << "]\n";
std::cout << " U = [ "; for(std::size_t i = 0; i < U_set.size(); i++) std::cout << U_set[i] << " "; std::cout << "]\n";
std::cout << " P = [ "; for(std::size_t i = 0; i < P_set.size(); i++) std::cout << P_set[i] << " "; std::cout << "]\n\n"; */
// Extract the rows of W in the P set
Matrix WP = bfgs.Wb(P_set);
// Solve y[P] = -inv(B[P, P]) * (B[P, L] * l[L] + B[P, U] * u[U] + c[P])
const int nP = P_set.size();
if (nP > 0)
{
Vector rhs = subvec(vecc, yP_set);
Vector lL = subvec(vecl, yL_set);
Vector uU = subvec(vecu, yU_set);
Vector tmp(nP);
bool nonzero = bfgs.apply_PtBQv(WP, L_set, lL, tmp, true);
if (nonzero)
rhs.noalias() += tmp;
nonzero = bfgs.apply_PtBQv(WP, U_set, uU, tmp, true);
if (nonzero)
rhs.noalias() += tmp;
bfgs.solve_PtBP(WP, -rhs, tmp);
subvec_assign(vecy, yP_set, tmp);
}
// Solve lambda[L] = B[L, F] * y + c[L]
const int nL = L_set.size();
const int nU = U_set.size();
Vector Fy;
if (nL > 0 || nU > 0)
bfgs.apply_WtPv(fv_set, vecy, Fy);
if (nL > 0)
{
Vector res;
bfgs.apply_PtWMv(L_set, Fy, res, Scalar(-1));
res.noalias() += subvec(vecc, yL_set) + bfgs.theta() * subvec(vecy, yL_set);
subvec_assign(lambda, yL_set, res);
}
// Solve mu[U] = -B[U, F] * y - c[U]
if (nU > 0)
{
Vector negRes;
bfgs.apply_PtWMv(U_set, Fy, negRes, Scalar(-1));
negRes.noalias() += subvec(vecc, yU_set) + bfgs.theta() * subvec(vecy, yU_set);
subvec_assign(mu, yU_set, -negRes);
}
// Test convergence
if (L_converged(yL_set, lambda) && U_converged(yU_set, mu) && P_converged(yP_set, vecy, vecl, vecu))
break;
}
// If the iterations do not converge, try the projection
if (k >= maxit)
{
vecy.noalias() = vecy.cwiseMax(vecl).cwiseMin(vecu);
subvec_assign(drt, fv_set, vecy);
// Test whether drt is a descent direction
Scalar dg = drt.dot(g);
// If yes, return the result
if (dg <= -std::numeric_limits<Scalar>::epsilon())
return;
// If not, fall back to the projected unconstrained solution
vecy.noalias() = yfallback.cwiseMax(vecl).cwiseMin(vecu);
subvec_assign(drt, fv_set, vecy);
dg = drt.dot(g);
if (dg <= -std::numeric_limits<Scalar>::epsilon())
return;
// If still not, fall back to the unconstrained solution
subvec_assign(drt, fv_set, yfallback);
return;
}
// std::cout << "** Minimization finished in " << k + 1 << " iteration(s) **\n\n";
// std::cout << "========================= Leaving subspace minimization =========================\n\n";
subvec_assign(drt, fv_set, vecy);
}
};
} // namespace LBFGSpp
/// \endcond
#endif // LBFGSPP_SUBSPACE_MIN_H
|