1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
/**
* @file lbfgs_test.cpp
* @author Ryan Curtin
* @author Marcus Edel
* @author Conrad Sanderson
*
* ensmallen is free software; you may redistribute it and/or modify it under
* the terms of the 3-clause BSD license. You should have received a copy of
* the 3-clause BSD license along with ensmallen. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#if defined(ENS_USE_COOT)
#include <armadillo>
#include <bandicoot>
#endif
#include <ensmallen.hpp>
#include "catch.hpp"
#include "test_function_tools.hpp"
using namespace ens;
using namespace ens::test;
// NOTE: L-BFGS depends on the squared gradient norm and the squared norm of
// different of gradients between iterations, and this can be much too large to
// be represented by FP16. So, we have only one FP16 test case.
TEMPLATE_TEST_CASE("LBFGS_RosenbrockFunction", "[LBFGS]",
ENS_FULLPREC_TEST_TYPES, ENS_SPARSE_TEST_TYPES)
{
L_BFGS lbfgs;
lbfgs.MaxIterations() = 10000;
FunctionTest<RosenbrockFunction, TestType>(lbfgs,
Tolerances<TestType>::LargeObj,
Tolerances<TestType>::LargeCoord);
}
/**
* Test the L-BFGS optimizer using an arma::mat with the Rosenbrock function and
* a sparse gradient.
*/
TEMPLATE_TEST_CASE("LBFGS_RosenbrockGradFunction", "[LBFGS]",
ENS_SPARSE_TEST_TYPES)
{
typedef typename TestType::elem_type ElemType;
RosenbrockFunction f;
L_BFGS lbfgs;
lbfgs.MaxIterations() = 10000;
arma::Mat<ElemType> coords = f.GetInitialPoint<arma::Col<ElemType> >();
lbfgs.Optimize<RosenbrockFunction, arma::Mat<ElemType>, TestType>(f, coords);
ElemType finalValue = f.Evaluate(coords);
REQUIRE(finalValue == Approx(0.0).margin(Tolerances<TestType>::Obj));
REQUIRE(coords(0) == Approx(1.0).epsilon(Tolerances<TestType>::Coord));
REQUIRE(coords(1) == Approx(1.0).epsilon(Tolerances<TestType>::Coord));
}
TEMPLATE_TEST_CASE("LBFGS_ColvilleFunction", "[LBFGS]", ENS_TEST_TYPES)
{
L_BFGS lbfgs;
lbfgs.MaxIterations() = 10000;
FunctionTest<ColvilleFunction, TestType>(lbfgs,
Tolerances<TestType>::LargeObj,
Tolerances<TestType>::LargeCoord);
}
TEMPLATE_TEST_CASE("LBFGS_WoodFunction", "[LBFGS]", ENS_FULLPREC_TEST_TYPES)
{
typedef typename TestType::elem_type ElemType;
L_BFGS lbfgs;
// Special tolerances: L-BFGS with floats will converge too early.
const double tol = std::is_same<ElemType, float>::value ? 20.0 : 1e-8;
FunctionTest<WoodFunction, TestType>(lbfgs, ElemType(tol),
ElemType(tol / 10));
}
/**
* Tests the L-BFGS optimizer using the generalized Rosenbrock function. This
* is actually multiple tests, increasing the dimension by powers of 2, from 4
* dimensions to 1024 dimensions.
*/
TEMPLATE_TEST_CASE("LBFGS_GeneralizedRosenbrockFunction", "[LBFGS]",
ENS_FULLPREC_CPU_TEST_TYPES)
{
typedef typename TestType::elem_type ElemType;
for (size_t i = 2; i < 10; i++)
{
// Dimension: powers of 2
size_t dim = std::pow(2.0, i);
GeneralizedRosenbrockFunction f(dim);
L_BFGS lbfgs(20);
lbfgs.MaxIterations() = 10000;
TestType coords = f.GetInitialPoint<TestType>();
lbfgs.Optimize(f, coords);
ElemType finalValue = f.Evaluate(coords);
// Test the output to make sure it is correct.
REQUIRE(finalValue == Approx(0.0).margin(Tolerances<TestType>::Obj));
for (size_t j = 0; j < dim; j++)
REQUIRE(coords(j) == Approx(1.0).epsilon(Tolerances<TestType>::Coord));
}
}
// This test will work with all test types (including FP16), but we leave the
// tolerances quite loose.
TEMPLATE_TEST_CASE("LBFGS_GeneralizedRosenbrockFunctionLoose", "[LBFGS]",
ENS_ALL_TEST_TYPES)
{
typedef typename TestType::elem_type ElemType;
GeneralizedRosenbrockFunction f(2);
L_BFGS lbfgs(20);
lbfgs.MaxIterations() = 1000;
// For FP16, to keep the gradient different norm small enough, we must limit
// the step size.
if (sizeof(ElemType) < 4)
lbfgs.MaxStep() = 0.15;
TestType coords = f.GetInitialPoint<TestType>();
lbfgs.Optimize(f, coords);
ElemType finalValue = f.Evaluate(coords);
// Test the output to make sure it is correct.
REQUIRE(finalValue ==
Approx(0.0).margin(50 * Tolerances<TestType>::LargeObj));
REQUIRE(coords(0) ==
Approx(1.0).margin(50 * Tolerances<TestType>::LargeCoord));
REQUIRE(coords(1) ==
Approx(1.0).margin(50 * Tolerances<TestType>::LargeCoord));
}
TEMPLATE_TEST_CASE("LBFGS_RosenbrockWoodFunction", "[LBFGS]",
ENS_FULLPREC_TEST_TYPES)
{
typedef typename TestType::elem_type ElemType;
L_BFGS lbfgs;
lbfgs.MaxIterations() = 10000;
// Special tolerances: L-BFGS with floats will converge too early.
const double tol = std::is_same<ElemType, float>::value ? 20.0 : 1e-8;
FunctionTest<RosenbrockWoodFunction, TestType>(lbfgs, ElemType(tol),
ElemType(tol / 10));
}
|