/* * Copyright (c) 2009-2014, Peter Abeles. All Rights Reserved. * * This file is part of Efficient Java Matrix Library (EJML). * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ejml.example; import org.ejml.data.DenseMatrix64F; import static org.ejml.ops.CommonOps.*; import static org.ejml.ops.SpecializedOps.diffNormF; /** *

* This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize * non-linear cost functions:
*
* S(P) = Sum{ i=1:m , [y_i - f(x_i,P)]²}
*
* where P is the set of parameters being optimized. *

* *

* In each iteration the parameters are updated using the following equations:
*
* P_i+1 = (H + λ I)^-1 d
* d = (1/N) Sum{ i=1..N , (f(x_i;P_i) - y_i) * jacobian(:,i) }
* H = (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)^T } *

* Whenever possible the allocation of new memory is avoided. This is accomplished by reshaping matrices. * A matrix that is reshaped won't grow unless the new shape requires more memory than it has available. *

* @author Peter Abeles */ public class LevenbergMarquardt { // how much the numerical jacobian calculation perturbs the parameters by. // In better implementation there are better ways to compute this delta. See Numerical Recipes. private final static double DELTA = 1e-8; private double initialLambda; // the function that is optimized private Function func; // the optimized parameters and associated costs private DenseMatrix64F param; private double initialCost; private double finalCost; // used by matrix operations private DenseMatrix64F d; private DenseMatrix64F H; private DenseMatrix64F negDelta; private DenseMatrix64F tempParam; private DenseMatrix64F A; // variables used by the numerical jacobian algorithm private DenseMatrix64F temp0; private DenseMatrix64F temp1; // used when computing d and H variables private DenseMatrix64F tempDH; // Where the numerical Jacobian is stored. private DenseMatrix64F jacobian; /** * Creates a new instance that uses the provided cost function. * * @param funcCost Cost function that is being optimized. */ public LevenbergMarquardt( Function funcCost ) { this.initialLambda = 1; // declare data to some initial small size. It will grow later on as needed. int maxElements = 1; int numParam = 1; this.temp0 = new DenseMatrix64F(maxElements,1); this.temp1 = new DenseMatrix64F(maxElements,1); this.tempDH = new DenseMatrix64F(maxElements,1); this.jacobian = new DenseMatrix64F(numParam,maxElements); this.func = funcCost; this.param = new DenseMatrix64F(numParam,1); this.d = new DenseMatrix64F(numParam,1); this.H = new DenseMatrix64F(numParam,numParam); this.negDelta = new DenseMatrix64F(numParam,1); this.tempParam = new DenseMatrix64F(numParam,1); this.A = new DenseMatrix64F(numParam,numParam); } public double getInitialCost() { return initialCost; } public double getFinalCost() { return finalCost; } public DenseMatrix64F getParameters() { return param; } /** * Finds the best fit parameters. * * @param initParam The initial set of parameters for the function. * @param X The inputs to the function. * @param Y The "observed" output of the function * @return true if it succeeded and false if it did not. */ public boolean optimize( DenseMatrix64F initParam , DenseMatrix64F X , DenseMatrix64F Y ) { configure(initParam,X,Y); // save the cost of the initial parameters so that it knows if it improves or not initialCost = cost(param,X,Y); // iterate until the difference between the costs is insignificant // or it iterates too many times if( !adjustParam(X, Y, initialCost) ) { finalCost = Double.NaN; return false; } return true; } /** * Iterate until the difference between the costs is insignificant * or it iterates too many times */ private boolean adjustParam(DenseMatrix64F X, DenseMatrix64F Y, double prevCost) { // lambda adjusts how big of a step it takes double lambda = initialLambda; // the difference between the current and previous cost double difference = 1000; for( int iter = 0; iter < 20 || difference < 1e-6 ; iter++ ) { // compute some variables based on the gradient computeDandH(param,X,Y); // try various step sizes and see if any of them improve the // results over what has already been done boolean foundBetter = false; for( int i = 0; i < 5; i++ ) { computeA(A,H,lambda); if( !solve(A,d,negDelta) ) { return false; } // compute the candidate parameters subtract(param, negDelta, tempParam); double cost = cost(tempParam,X,Y); if( cost < prevCost ) { // the candidate parameters produced better results so use it foundBetter = true; param.set(tempParam); difference = prevCost - cost; prevCost = cost; lambda /= 10.0; } else { lambda *= 10.0; } } // it reached a point where it can't improve so exit if( !foundBetter ) break; } finalCost = prevCost; return true; } /** * Performs sanity checks on the input data and reshapes internal matrices. By reshaping * a matrix it will only declare new memory when needed. */ protected void configure( DenseMatrix64F initParam , DenseMatrix64F X , DenseMatrix64F Y ) { if( Y.getNumRows() != X.getNumRows() ) { throw new IllegalArgumentException("Different vector lengths"); } else if( Y.getNumCols() != 1 || X.getNumCols() != 1 ) { throw new IllegalArgumentException("Inputs must be a column vector"); } int numParam = initParam.getNumElements(); int numPoints = Y.getNumRows(); if( param.getNumElements() != initParam.getNumElements() ) { // reshaping a matrix means that new memory is only declared when needed this.param.reshape(numParam,1, false); this.d.reshape(numParam,1, false); this.H.reshape(numParam,numParam, false); this.negDelta.reshape(numParam,1, false); this.tempParam.reshape(numParam,1, false); this.A.reshape(numParam,numParam, false); } param.set(initParam); // reshaping a matrix means that new memory is only declared when needed temp0.reshape(numPoints,1, false); temp1.reshape(numPoints,1, false); tempDH.reshape(numPoints,1, false); jacobian.reshape(numParam,numPoints, false); } /** * Computes the d and H parameters. Where d is the average error gradient and * H is an approximation of the hessian. */ private void computeDandH( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F y ) { func.compute(param,x, tempDH); subtractEquals(tempDH, y); computeNumericalJacobian(param,x,jacobian); int numParam = param.getNumElements(); int length = x.getNumElements(); // d = average{ (f(x_i;p) - y_i) * jacobian(:,i) } for( int i = 0; i < numParam; i++ ) { double total = 0; for( int j = 0; j < length; j++ ) { total += tempDH.get(j,0)*jacobian.get(i,j); } d.set(i,0,total/length); } // compute the approximation of the hessian multTransB(jacobian,jacobian,H); scale(1.0/length,H); } /** * A = H + lambda*I
*
* where I is an identity matrix. */ private void computeA( DenseMatrix64F A , DenseMatrix64F H , double lambda ) { final int numParam = param.getNumElements(); A.set(H); for( int i = 0; i < numParam; i++ ) { A.set(i,i, A.get(i,i) + lambda); } } /** * Computes the "cost" for the parameters given. * * cost = (1/N) Sum (f(x;p) - y)^2 */ private double cost( DenseMatrix64F param , DenseMatrix64F X , DenseMatrix64F Y) { func.compute(param,X, temp0); double error = diffNormF(temp0,Y); return error*error / (double)X.numRows; } /** * Computes a simple numerical Jacobian. * * @param param The set of parameters that the Jacobian is to be computed at. * @param pt The point around which the Jacobian is to be computed. * @param deriv Where the jacobian will be stored */ protected void computeNumericalJacobian( DenseMatrix64F param , DenseMatrix64F pt , DenseMatrix64F deriv ) { double invDelta = 1.0/DELTA; func.compute(param,pt, temp0); // compute the jacobian by perturbing the parameters slightly // then seeing how it effects the results. for( int i = 0; i < param.numRows; i++ ) { param.data[i] += DELTA; func.compute(param,pt, temp1); // compute the difference between the two parameters and divide by the delta add(invDelta,temp1,-invDelta,temp0,temp1); // copy the results into the jacobian matrix System.arraycopy(temp1.data,0,deriv.data,i*pt.numRows,pt.numRows); param.data[i] -= DELTA; } } /** * The function that is being optimized. */ public interface Function { /** * Computes the output for each value in matrix x given the set of parameters. * * @param param The parameter for the function. * @param x the input points. * @param y the resulting output. */ public void compute( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F y ); } }