File: lbfgs_test.cpp

package info (click to toggle)
ensmallen 3.11.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 5,952 kB
  • sloc: cpp: 44,963; sh: 186; makefile: 35
file content (152 lines) | stat: -rw-r--r-- 4,832 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/**
 * @file lbfgs_test.cpp
 * @author Ryan Curtin
 * @author Marcus Edel
 * @author Conrad Sanderson
 *
 * ensmallen is free software; you may redistribute it and/or modify it under
 * the terms of the 3-clause BSD license.  You should have received a copy of
 * the 3-clause BSD license along with ensmallen.  If not, see
 * http://www.opensource.org/licenses/BSD-3-Clause for more information.
 */
#if defined(ENS_USE_COOT)
  #include <armadillo>
  #include <bandicoot>
#endif
#include <ensmallen.hpp>
#include "catch.hpp"
#include "test_function_tools.hpp"

using namespace ens;
using namespace ens::test;

// NOTE: L-BFGS depends on the squared gradient norm and the squared norm of
// different of gradients between iterations, and this can be much too large to
// be represented by FP16.  So, we have only one FP16 test case.

TEMPLATE_TEST_CASE("LBFGS_RosenbrockFunction", "[LBFGS]",
    ENS_FULLPREC_TEST_TYPES, ENS_SPARSE_TEST_TYPES)
{
  L_BFGS lbfgs;
  lbfgs.MaxIterations() = 10000;

  FunctionTest<RosenbrockFunction, TestType>(lbfgs,
      Tolerances<TestType>::LargeObj,
      Tolerances<TestType>::LargeCoord);
}

/**
 * Test the L-BFGS optimizer using an arma::mat with the Rosenbrock function and
 * a sparse gradient.
 */
TEMPLATE_TEST_CASE("LBFGS_RosenbrockGradFunction", "[LBFGS]",
    ENS_SPARSE_TEST_TYPES)
{
  typedef typename TestType::elem_type ElemType;

  RosenbrockFunction f;
  L_BFGS lbfgs;
  lbfgs.MaxIterations() = 10000;

  arma::Mat<ElemType> coords = f.GetInitialPoint<arma::Col<ElemType> >();
  lbfgs.Optimize<RosenbrockFunction, arma::Mat<ElemType>, TestType>(f, coords);

  ElemType finalValue = f.Evaluate(coords);

  REQUIRE(finalValue == Approx(0.0).margin(Tolerances<TestType>::Obj));
  REQUIRE(coords(0) == Approx(1.0).epsilon(Tolerances<TestType>::Coord));
  REQUIRE(coords(1) == Approx(1.0).epsilon(Tolerances<TestType>::Coord));
}

TEMPLATE_TEST_CASE("LBFGS_ColvilleFunction", "[LBFGS]", ENS_TEST_TYPES)
{
  L_BFGS lbfgs;
  lbfgs.MaxIterations() = 10000;
  FunctionTest<ColvilleFunction, TestType>(lbfgs,
      Tolerances<TestType>::LargeObj,
      Tolerances<TestType>::LargeCoord);
}

TEMPLATE_TEST_CASE("LBFGS_WoodFunction", "[LBFGS]", ENS_FULLPREC_TEST_TYPES)
{
  typedef typename TestType::elem_type ElemType;

  L_BFGS lbfgs;
  // Special tolerances: L-BFGS with floats will converge too early.
  const double tol = std::is_same<ElemType, float>::value ? 20.0 : 1e-8;
  FunctionTest<WoodFunction, TestType>(lbfgs, ElemType(tol),
      ElemType(tol / 10));
}

/**
 * Tests the L-BFGS optimizer using the generalized Rosenbrock function.  This
 * is actually multiple tests, increasing the dimension by powers of 2, from 4
 * dimensions to 1024 dimensions.
 */
TEMPLATE_TEST_CASE("LBFGS_GeneralizedRosenbrockFunction", "[LBFGS]",
    ENS_FULLPREC_CPU_TEST_TYPES)
{
  typedef typename TestType::elem_type ElemType;

  for (size_t i = 2; i < 10; i++)
  {
    // Dimension: powers of 2
    size_t dim = std::pow(2.0, i);

    GeneralizedRosenbrockFunction f(dim);
    L_BFGS lbfgs(20);
    lbfgs.MaxIterations() = 10000;

    TestType coords = f.GetInitialPoint<TestType>();
    lbfgs.Optimize(f, coords);

    ElemType finalValue = f.Evaluate(coords);

    // Test the output to make sure it is correct.
    REQUIRE(finalValue == Approx(0.0).margin(Tolerances<TestType>::Obj));
    for (size_t j = 0; j < dim; j++)
      REQUIRE(coords(j) == Approx(1.0).epsilon(Tolerances<TestType>::Coord));
  }
}

// This test will work with all test types (including FP16), but we leave the
// tolerances quite loose.
TEMPLATE_TEST_CASE("LBFGS_GeneralizedRosenbrockFunctionLoose", "[LBFGS]",
    ENS_ALL_TEST_TYPES)
{
  typedef typename TestType::elem_type ElemType;

  GeneralizedRosenbrockFunction f(2);
  L_BFGS lbfgs(20);
  lbfgs.MaxIterations() = 1000;
  // For FP16, to keep the gradient different norm small enough, we must limit
  // the step size.
  if (sizeof(ElemType) < 4)
    lbfgs.MaxStep() = 0.15;

  TestType coords = f.GetInitialPoint<TestType>();
  lbfgs.Optimize(f, coords);

  ElemType finalValue = f.Evaluate(coords);

  // Test the output to make sure it is correct.
  REQUIRE(finalValue ==
      Approx(0.0).margin(50 * Tolerances<TestType>::LargeObj));
  REQUIRE(coords(0) ==
        Approx(1.0).margin(50 * Tolerances<TestType>::LargeCoord));
  REQUIRE(coords(1) ==
        Approx(1.0).margin(50 * Tolerances<TestType>::LargeCoord));
}

TEMPLATE_TEST_CASE("LBFGS_RosenbrockWoodFunction", "[LBFGS]",
    ENS_FULLPREC_TEST_TYPES)
{
  typedef typename TestType::elem_type ElemType;

  L_BFGS lbfgs;
  lbfgs.MaxIterations() = 10000;
  // Special tolerances: L-BFGS with floats will converge too early.
  const double tol = std::is_same<ElemType, float>::value ? 20.0 : 1e-8;
  FunctionTest<RosenbrockWoodFunction, TestType>(lbfgs, ElemType(tol),
      ElemType(tol / 10));
}