File: itkAdaptiveStepsizeOptimizer.h

package info (click to toggle)
elastix 5.2.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 42,480 kB
  • sloc: cpp: 68,403; lisp: 4,118; python: 1,013; xml: 182; sh: 177; makefile: 33
file content (149 lines) | stat: -rw-r--r-- 5,479 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*=========================================================================
 *
 *  Copyright UMC Utrecht and contributors
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0.txt
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *=========================================================================*/
#ifndef itkAdaptiveStepsizeOptimizer_h
#define itkAdaptiveStepsizeOptimizer_h

#include "../StandardGradientDescent/itkStandardGradientDescentOptimizer.h"

namespace itk
{

/**
 * \class AdaptiveStepsizeOptimizer
 * \brief This class implements a gradient descent optimizer with adaptive gain.
 *
 * If \f$C(x)\f$ is a cost function that has to be minimized, the following iterative
 * algorithm is used to find the optimal parameters \f$x\f$:
 *
 *   \f[ x(k+1) = x(k) - a(t_k) dC/dx \f]
 *
 * The gain \f$a(t_k)\f$ at each iteration \f$k\f$ is defined by:
 *
 *   \f[ a(t_k) =  a / (A + t_k + 1)^alpha \f].
 *
 * And the time \f$t_k\f$ is updated according to:
 *
 *   \f[ t_{k+1} = [ t_k + sigmoid( -g_k^T g_{k-1} ) ]^+ \f]
 *
 * where \f$g_k\f$ equals \f$dC/dx\f$ at iteration \f$k\f$.
 * For \f$t_0\f$ the InitialTime is used, which is defined in the
 * the superclass (StandardGradientDescentOptimizer). Whereas in the
 * superclass this parameter is superfluous, in this class it makes sense.
 *
 * This method is described in the following references:
 *
 * [1] P. Cruz,
 * "Almost sure convergence and asymptotical normality of a generalization of Kesten's
 * stochastic approximation algorithm for multidimensional case."
 * Technical Report, 2005. http://hdl.handle.net/2052/74
 *
 * [2] S. Klein, J.P.W. Pluim, and M. Staring, M.A. Viergever,
 * "Adaptive stochastic gradient descent optimisation for image registration,"
 * International Journal of Computer Vision, vol. 81, no. 3, pp. 227-239, 2009.
 * http://dx.doi.org/10.1007/s11263-008-0168-y
 *
 * It is very suitable to be used in combination with a stochastic estimate
 * of the gradient \f$dC/dx\f$. For example, in image registration problems it is
 * often advantageous to compute the metric derivative (\f$dC/dx\f$) on a new set
 * of randomly selected image samples in each iteration. You may set the parameter
 * \c NewSamplesEveryIteration to \c "true" to achieve this effect.
 * For more information on this strategy, you may have a look at:
 *
 * \sa VoxelWiseASGD, StandardGradientDescentOptimizer
 * \ingroup Optimizers
 */

class AdaptiveStepsizeOptimizer : public StandardGradientDescentOptimizer
{
public:
  ITK_DISALLOW_COPY_AND_MOVE(AdaptiveStepsizeOptimizer);

  /** Standard ITK.*/
  using Self = AdaptiveStepsizeOptimizer;
  using Superclass = StandardGradientDescentOptimizer;
  using Pointer = SmartPointer<Self>;
  using ConstPointer = SmartPointer<const Self>;

  /** Method for creation through the object factory. */
  itkNewMacro(Self);

  /** Run-time type information (and related methods). */
  itkTypeMacro(AdaptiveStepsizeOptimizer, StandardGradientDescentOptimizer);

  /** Typedefs inherited from the superclass. */
  using Superclass::MeasureType;
  using Superclass::ParametersType;
  using Superclass::DerivativeType;
  using Superclass::CostFunctionType;
  using Superclass::ScalesType;
  using Superclass::ScaledCostFunctionType;
  using Superclass::ScaledCostFunctionPointer;
  using Superclass::StopConditionType;

  /** Set/Get whether the adaptive step size mechanism is desired. Default: true */
  itkSetMacro(UseAdaptiveStepSizes, bool);
  itkGetConstMacro(UseAdaptiveStepSizes, bool);

  /** Set/Get the maximum of the sigmoid.
   * Should be >0. Default: 1.0 */
  itkSetMacro(SigmoidMax, double);
  itkGetConstMacro(SigmoidMax, double);

  /** Set/Get the maximum of the sigmoid.
   * Should be <0. Default: -0.8 */
  itkSetMacro(SigmoidMin, double);
  itkGetConstMacro(SigmoidMin, double);

  /** Set/Get the scaling of the sigmoid width. Large values
   * cause a more wide sigmoid. Default: 1e-8. Should be >0. */
  itkSetMacro(SigmoidScale, double);
  itkGetConstMacro(SigmoidScale, double);

  /** Get current gradient. */
  itkGetConstReferenceMacro(PreconditionVector, ParametersType);

protected:
  AdaptiveStepsizeOptimizer();
  ~AdaptiveStepsizeOptimizer() override = default;

  /** Function to update the current time
   * If UseAdaptiveStepSizes is false this function just increments
   * the CurrentTime by \f$E_0 = (sigmoid_{max} + sigmoid_{min})/2\f$.
   * Else, the CurrentTime is updated according to:\n
   * time = max[ 0, time + sigmoid( -gradient*previousgradient) ]\n
   * In that case, also the m_PreviousGradient is updated.
   */
  void
  UpdateCurrentTime() override;

  /** The PreviousGradient, necessary for the CruzAcceleration */
  DerivativeType m_PreviousSearchDirection{};
  ParametersType m_PreconditionVector{};
  std::string    m_StepSizeStrategy{};

private:
  /** Settings */
  bool   m_UseAdaptiveStepSizes{ true };
  double m_SigmoidMax{ 1.0 };
  double m_SigmoidMin{ -0.8 };
  double m_SigmoidScale{ 1e-8 };
};

} // end namespace itk

#endif