1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
|
#ifndef CAFFE_HINGE_LOSS_LAYER_HPP_
#define CAFFE_HINGE_LOSS_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/loss_layer.hpp"
namespace caffe {
/**
* @brief Computes the hinge loss for a one-of-many classification task.
*
* @param bottom input Blob vector (length 2)
* -# @f$ (N \times C \times H \times W) @f$
* the predictions @f$ t @f$, a Blob with values in
* @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
* the @f$ K = CHW @f$ classes. In an SVM, @f$ t @f$ is the result of
* taking the inner product @f$ X^T W @f$ of the D-dimensional features
* @f$ X \in \mathcal{R}^{D \times N} @f$ and the learned hyperplane
* parameters @f$ W \in \mathcal{R}^{D \times K} @f$, so a Net with just
* an InnerProductLayer (with num_output = D) providing predictions to a
* HingeLossLayer and no other learnable parameters or losses is
* equivalent to an SVM.
* -# @f$ (N \times 1 \times 1 \times 1) @f$
* the labels @f$ l @f$, an integer-valued Blob with values
* @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
* indicating the correct class label among the @f$ K @f$ classes
* @param top output Blob vector (length 1)
* -# @f$ (1 \times 1 \times 1 \times 1) @f$
* the computed hinge loss: @f$ E =
* \frac{1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^K
* [\max(0, 1 - \delta\{l_n = k\} t_{nk})] ^ p
* @f$, for the @f$ L^p @f$ norm
* (defaults to @f$ p = 1 @f$, the L1 norm; L2 norm, as in L2-SVM,
* is also available), and @f$
* \delta\{\mathrm{condition}\} = \left\{
* \begin{array}{lr}
* 1 & \mbox{if condition} \\
* -1 & \mbox{otherwise}
* \end{array} \right.
* @f$
*
* In an SVM, @f$ t \in \mathcal{R}^{N \times K} @f$ is the result of taking
* the inner product @f$ X^T W @f$ of the features
* @f$ X \in \mathcal{R}^{D \times N} @f$
* and the learned hyperplane parameters
* @f$ W \in \mathcal{R}^{D \times K} @f$. So, a Net with just an
* InnerProductLayer (with num_output = @f$k@f$) providing predictions to a
* HingeLossLayer is equivalent to an SVM (assuming it has no other learned
* outside the InnerProductLayer and no other losses outside the
* HingeLossLayer).
*/
template <typename Dtype>
class HingeLossLayer : public LossLayer<Dtype> {
public:
explicit HingeLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param) {}
virtual inline const char* type() const { return "HingeLoss"; }
protected:
/// @copydoc HingeLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
/**
* @brief Computes the hinge loss error gradient w.r.t. the predictions.
*
* Gradients cannot be computed with respect to the label inputs (bottom[1]),
* so this method ignores bottom[1] and requires !propagate_down[1], crashing
* if propagate_down[1] is set.
*
* @param top output Blob vector (length 1), providing the error gradient with
* respect to the outputs
* -# @f$ (1 \times 1 \times 1 \times 1) @f$
* This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
* as @f$ \lambda @f$ is the coefficient of this layer's output
* @f$\ell_i@f$ in the overall Net loss
* @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
* @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
* (*Assuming that this top Blob is not used as a bottom (input) by any
* other layer of the Net.)
* @param propagate_down see Layer::Backward.
* propagate_down[1] must be false as we can't compute gradients with
* respect to the labels.
* @param bottom input Blob vector (length 2)
* -# @f$ (N \times C \times H \times W) @f$
* the predictions @f$t@f$; Backward computes diff
* @f$ \frac{\partial E}{\partial t} @f$
* -# @f$ (N \times 1 \times 1 \times 1) @f$
* the labels -- ignored as we can't compute their error gradients
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
};
} // namespace caffe
#endif // CAFFE_HINGE_LOSS_LAYER_HPP_
|