File: SubspaceMin.h

package info (click to toggle)
lbfgspp 0.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 420 kB
  • sloc: cpp: 2,225; sh: 19; makefile: 4
file content (309 lines) | stat: -rw-r--r-- 11,204 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// Copyright (C) 2020-2025 Yixuan Qiu <yixuan.qiu@cos.name>
// Under MIT license

#ifndef LBFGSPP_SUBSPACE_MIN_H
#define LBFGSPP_SUBSPACE_MIN_H

#include <stdexcept>
#include <vector>
#include <Eigen/Core>
#include "BFGSMat.h"

/// \cond

namespace LBFGSpp {

//
// Subspace minimization procedure of the L-BFGS-B algorithm,
// mainly for internal use.
//
// The target of subspace minimization is to minimize the quadratic function m(x)
// over the free variables, subject to the bound condition.
// Free variables stand for coordinates that are not at the boundary in xcp,
// the generalized Cauchy point.
//
// In the classical implementation of L-BFGS-B [1], the minimization is done by first
// ignoring the box constraints, followed by a line search. Our implementation is
// an exact minimization subject to the bounds, based on the BOXCQP algorithm [2].
//
// Reference:
// [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.
// [2] C. Voglis and I. E. Lagaris (2004). BOXCQP: An algorithm for bound constrained convex quadratic problems.
//
template <typename Scalar>
class SubspaceMin
{
private:
    using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
    using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
    using IndexSet = std::vector<int>;

    // v[ind]
    static Vector subvec(const Vector& v, const IndexSet& ind)
    {
        const int nsub = ind.size();
        Vector res(nsub);
        for (int i = 0; i < nsub; i++)
            res[i] = v[ind[i]];
        return res;
    }

    // v[ind] = rhs
    static void subvec_assign(Vector& v, const IndexSet& ind, const Vector& rhs)
    {
        const int nsub = ind.size();
        for (int i = 0; i < nsub; i++)
            v[ind[i]] = rhs[i];
    }

    // Check whether the vector is within the bounds
    static bool in_bounds(const Vector& x, const Vector& lb, const Vector& ub)
    {
        const int n = x.size();
        for (int i = 0; i < n; i++)
        {
            if (x[i] < lb[i] || x[i] > ub[i])
                return false;
        }
        return true;
    }

    // Test convergence of P set
    static bool P_converged(const IndexSet& yP_set, const Vector& vecy, const Vector& vecl, const Vector& vecu)
    {
        const int nP = yP_set.size();
        for (int i = 0; i < nP; i++)
        {
            const int coord = yP_set[i];
            if (vecy[coord] < vecl[coord] || vecy[coord] > vecu[coord])
                return false;
        }
        return true;
    }

    // Test convergence of L set
    static bool L_converged(const IndexSet& yL_set, const Vector& lambda)
    {
        const int nL = yL_set.size();
        for (int i = 0; i < nL; i++)
        {
            const int coord = yL_set[i];
            if (lambda[coord] < Scalar(0))
                return false;
        }
        return true;
    }

    // Test convergence of L set
    static bool U_converged(const IndexSet& yU_set, const Vector& mu)
    {
        const int nU = yU_set.size();
        for (int i = 0; i < nU; i++)
        {
            const int coord = yU_set[i];
            if (mu[coord] < Scalar(0))
                return false;
        }
        return true;
    }

public:
    // bfgs:       An object that represents the BFGS approximation matrix.
    // x0:         Current parameter vector.
    // xcp:        Computed generalized Cauchy point.
    // g:          Gradient at x0.
    // lb:         Lower bounds for x.
    // ub:         Upper bounds for x.
    // Wd:         W'(xcp - x0)
    // newact_set: Coordinates that newly become active during the GCP procedure.
    // fv_set:     Free variable set.
    // maxit:      Maximum number of iterations.
    // drt:        The output direction vector, drt = xsm - x0.
    static void subspace_minimize(
        const BFGSMat<Scalar, true>& bfgs, const Vector& x0, const Vector& xcp, const Vector& g,
        const Vector& lb, const Vector& ub, const Vector& Wd, const IndexSet& newact_set, const IndexSet& fv_set, int maxit,
        Vector& drt)
    {
        // std::cout << "========================= Entering subspace minimization =========================\n\n";

        // d = xcp - x0
        drt.noalias() = xcp - x0;
        // Size of free variables
        const int nfree = fv_set.size();
        // If there is no free variable, simply return drt
        if (nfree < 1)
        {
            // std::cout << "========================= (Early) leaving subspace minimization =========================\n\n";
            return;
        }

        // std::cout << "New active set = [ "; for(std::size_t i = 0; i < newact_set.size(); i++)  std::cout << newact_set[i] << " "; std::cout << "]\n";
        // std::cout << "Free variable set = [ "; for(std::size_t i = 0; i < fv_set.size(); i++)  std::cout << fv_set[i] << " "; std::cout << "]\n\n";

        // Extract the rows of W in the free variable set
        Matrix WF = bfgs.Wb(fv_set);
        // Compute F'BAb = -F'WMW'AA'd
        Vector vecc(nfree);
        bfgs.compute_FtBAb(WF, fv_set, newact_set, Wd, drt, vecc);
        // Set the vector c=F'BAb+F'g for linear term, and vectors l and u for the new bounds
        Vector vecl(nfree), vecu(nfree);
        for (int i = 0; i < nfree; i++)
        {
            const int coord = fv_set[i];
            vecl[i] = lb[coord] - x0[coord];
            vecu[i] = ub[coord] - x0[coord];
            vecc[i] += g[coord];
        }
        // Solve y = -inv(B[F, F]) * c
        Vector vecy(nfree);
        bfgs.solve_PtBP(WF, -vecc, vecy);
        // Test feasibility
        // If yes, then the solution has been found
        if (in_bounds(vecy, vecl, vecu))
        {
            subvec_assign(drt, fv_set, vecy);
            return;
        }
        // Otherwise, enter the iterations

        // Make a copy of y as a fallback solution
        Vector yfallback = vecy;
        // Dual variables
        Vector lambda = Vector::Zero(nfree), mu = Vector::Zero(nfree);

        // Iterations
        IndexSet L_set, U_set, P_set, yL_set, yU_set, yP_set;
        L_set.reserve(nfree / 3);
        yL_set.reserve(nfree / 3);
        U_set.reserve(nfree / 3);
        yU_set.reserve(nfree / 3);
        P_set.reserve(nfree);
        yP_set.reserve(nfree);
        int k;
        for (k = 0; k < maxit; k++)
        {
            // Construct the L, U, and P sets, and then update values
            // Indices in original drt vector
            L_set.clear();
            U_set.clear();
            P_set.clear();
            // Indices in y
            yL_set.clear();
            yU_set.clear();
            yP_set.clear();
            for (int i = 0; i < nfree; i++)
            {
                const int coord = fv_set[i];
                const Scalar li = vecl[i], ui = vecu[i];
                if ((vecy[i] < li) || (vecy[i] == li && lambda[i] >= Scalar(0)))
                {
                    L_set.push_back(coord);
                    yL_set.push_back(i);
                    vecy[i] = li;
                    mu[i] = Scalar(0);
                }
                else if ((vecy[i] > ui) || (vecy[i] == ui && mu[i] >= Scalar(0)))
                {
                    U_set.push_back(coord);
                    yU_set.push_back(i);
                    vecy[i] = ui;
                    lambda[i] = Scalar(0);
                }
                else
                {
                    P_set.push_back(coord);
                    yP_set.push_back(i);
                    lambda[i] = Scalar(0);
                    mu[i] = Scalar(0);
                }
            }

            /* std::cout << "** Iter " << k << " **\n";
            std::cout << "   L = [ "; for(std::size_t i = 0; i < L_set.size(); i++)  std::cout << L_set[i] << " "; std::cout << "]\n";
            std::cout << "   U = [ "; for(std::size_t i = 0; i < U_set.size(); i++)  std::cout << U_set[i] << " "; std::cout << "]\n";
            std::cout << "   P = [ "; for(std::size_t i = 0; i < P_set.size(); i++)  std::cout << P_set[i] << " "; std::cout << "]\n\n"; */

            // Extract the rows of W in the P set
            Matrix WP = bfgs.Wb(P_set);
            // Solve y[P] = -inv(B[P, P]) * (B[P, L] * l[L] + B[P, U] * u[U] + c[P])
            const int nP = P_set.size();
            if (nP > 0)
            {
                Vector rhs = subvec(vecc, yP_set);
                Vector lL = subvec(vecl, yL_set);
                Vector uU = subvec(vecu, yU_set);
                Vector tmp(nP);
                bool nonzero = bfgs.apply_PtBQv(WP, L_set, lL, tmp, true);
                if (nonzero)
                    rhs.noalias() += tmp;
                nonzero = bfgs.apply_PtBQv(WP, U_set, uU, tmp, true);
                if (nonzero)
                    rhs.noalias() += tmp;

                bfgs.solve_PtBP(WP, -rhs, tmp);
                subvec_assign(vecy, yP_set, tmp);
            }

            // Solve lambda[L] = B[L, F] * y + c[L]
            const int nL = L_set.size();
            const int nU = U_set.size();
            Vector Fy;
            if (nL > 0 || nU > 0)
                bfgs.apply_WtPv(fv_set, vecy, Fy);
            if (nL > 0)
            {
                Vector res;
                bfgs.apply_PtWMv(L_set, Fy, res, Scalar(-1));
                res.noalias() += subvec(vecc, yL_set) + bfgs.theta() * subvec(vecy, yL_set);
                subvec_assign(lambda, yL_set, res);
            }

            // Solve mu[U] = -B[U, F] * y - c[U]
            if (nU > 0)
            {
                Vector negRes;
                bfgs.apply_PtWMv(U_set, Fy, negRes, Scalar(-1));
                negRes.noalias() += subvec(vecc, yU_set) + bfgs.theta() * subvec(vecy, yU_set);
                subvec_assign(mu, yU_set, -negRes);
            }

            // Test convergence
            if (L_converged(yL_set, lambda) && U_converged(yU_set, mu) && P_converged(yP_set, vecy, vecl, vecu))
                break;
        }

        // If the iterations do not converge, try the projection
        if (k >= maxit)
        {
            vecy.noalias() = vecy.cwiseMax(vecl).cwiseMin(vecu);
            subvec_assign(drt, fv_set, vecy);
            // Test whether drt is a descent direction
            Scalar dg = drt.dot(g);
            // If yes, return the result
            if (dg <= -std::numeric_limits<Scalar>::epsilon())
                return;

            // If not, fall back to the projected unconstrained solution
            vecy.noalias() = yfallback.cwiseMax(vecl).cwiseMin(vecu);
            subvec_assign(drt, fv_set, vecy);
            dg = drt.dot(g);
            if (dg <= -std::numeric_limits<Scalar>::epsilon())
                return;

            // If still not, fall back to the unconstrained solution
            subvec_assign(drt, fv_set, yfallback);
            return;
        }

        // std::cout << "** Minimization finished in " << k + 1 << " iteration(s) **\n\n";
        // std::cout << "========================= Leaving subspace minimization =========================\n\n";

        subvec_assign(drt, fv_set, vecy);
    }
};

}  // namespace LBFGSpp

/// \endcond

#endif  // LBFGSPP_SUBSPACE_MIN_H