1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
#include "caffe2/operators/elu_op.h"
#include <algorithm>
#include <functional>
#include <string>
#include "caffe2/utils/eigen_utils.h"
namespace caffe2 {
template <>
template <typename T>
bool EluFunctor<CPUContext>::
operator()(const int N, const T* X, T* Y, CPUContext* /* context */) const {
ConstEigenVectorArrayMap<T> X_arr(X, N);
EigenVectorMap<T>(Y, N) =
(X_arr < 0).select(alpha * (X_arr.exp() - T(1)), X_arr);
return true;
}
template <>
template <typename T>
bool EluGradientFunctor<CPUContext>::Forward(
const std::vector<int>& Y_dims,
const std::vector<int>& /* dY_dims */,
const T* Y,
const T* dY,
T* dX,
CPUContext* /* context */) const {
const int size = std::accumulate(
// NOLINTNEXTLINE(modernize-use-transparent-functors)
Y_dims.cbegin(), Y_dims.cend(), 1, std::multiplies<int>());
ConstEigenVectorArrayMap<T> Y_arr(Y, size);
ConstEigenVectorArrayMap<T> dY_arr(dY, size);
EigenVectorArrayMap<T>(dX, size) =
(Y_arr < 0).select(dY_arr * (Y_arr + alpha), dY_arr);
return true;
}
REGISTER_CPU_OPERATOR(
Elu,
UnaryElementwiseWithArgsOp<
TensorTypes<float>,
CPUContext,
EluFunctor<CPUContext>>);
REGISTER_CPU_GRADIENT_OPERATOR(
EluGradient,
BinaryElementwiseWithArgsOp<
TensorTypes<float>,
CPUContext,
EluGradientFunctor<CPUContext>>);
// Input: X, output: Y
OPERATOR_SCHEMA(Elu)
.NumInputs(1)
.NumOutputs(1)
.AllowInplace({{0, 0}})
.IdenticalTypeAndShape()
.SetDoc(R"DOC(
This op implements the exponential linear unit (ELU) activation function as described in [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289). The op takes an input tensor $X$ of arbitrary shape, computes the elementwise elu operation, and returns a vector $Y$ of the same shape as output. The alpha parameter may be passed as an argument, but defaults to 1. The elu operation is defined as
$$y=f(x) =\begin{cases}\alpha(e^x-1) & x < 0 \\ x & otherwise\end{cases}$$
Github Links:
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.h
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.cc
<details>
<summary> <b>Example</b> </summary>
**Code**
```
workspace.ResetWorkspace()
op = core.CreateOperator(
"Elu",
["X"],
["Y"],
alpha=1.1
)
workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
print("X:\n", workspace.FetchBlob("X"), "\n")
workspace.RunOperatorOnce(op)
print("Y:\n", workspace.FetchBlob("Y"))
```
**Result**
```
X:
[[ 0.35339102 1.1860217 -0.10710736]
[-3.1173866 -0.1889988 -0.20330353]
[ 1.8525308 -0.368949 0.506277 ]]
Y:
[[ 0.35339102 1.1860217 -0.11172786]
[-1.0513 -0.18943374 -0.20236646]
[ 1.8525308 -0.33939326 0.506277 ]]
```
</details>
)DOC")
.Input(0, "X", "1D input tensor of data to be operated on.")
.Output(0, "Y", "1D input tensor, calculated as described above.")
.Arg(
"alpha",
"*(type: float; default: 1.0)* Defines alpha parameter used in calculation.")
.InheritOnnxSchema();
// Input: Y, dY, output: dX
GRADIENT_OPERATOR_SCHEMA(EluGradient)
.NumInputs(2)
.NumOutputs(1)
.AllowInplace({{1, 0}})
.SetDoc(R"DOC(
EluGradient takes both Y and dY and uses this to update dX according to the
chain rule and derivatives of the rectified linear function.
)DOC");
namespace {
class GetEluGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
std::vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
def_.type() + "Gradient",
"",
std::vector<std::string>{O(0), GO(0)},
std::vector<std::string>{GI(0)});
}
};
} // namespace
REGISTER_GRADIENT(Elu, GetEluGradient);
} // namespace caffe2
|