1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
#pragma once
#include "caffe2/core/tensor_int8.h"
#include "caffe2/operators/elementwise_ops.h"
#include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
#include "caffe2/quantization/server/dnnlowp_op.h"
#include "caffe2/quantization/server/sigmoid.h"
namespace caffe2 {
template <typename T, class Functor>
class UnaryElementwiseWithArgsDNNLowPOp : public Operator<CPUContext> {
public:
USE_OPERATOR_FUNCTIONS(CPUContext);
UnaryElementwiseWithArgsDNNLowPOp(
const OperatorDef& operator_def,
Workspace* ws)
: Operator<CPUContext>(operator_def, ws), functor_() {}
bool RunOnDevice() override {
if (!arguments_parsed_) {
dnnlowp::ParseDNNLowPOperatorArguments(this);
dnnlowp::SetStaticQuantizationParams(
this, 0, functor_.GetOutputQuantizationParams());
arguments_parsed_ = true;
}
auto& input = this->template Input<int8::Int8TensorCPU>(0).t;
auto& output = Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t;
output.ResizeLike(input);
functor_(
input.size(),
input.template data<T>(),
output.template mutable_data<T>());
dnnlowp::PropagateOutputTensorQuantizationParams(
this, 0, functor_.GetOutputQuantizationParams());
return true;
}
private:
Functor functor_;
bool arguments_parsed_{false};
};
template <typename T, typename FP32_OP>
class BinaryElementwiseDNNLowPOp : public DNNLowPOp<T, FP32_OP> {
public:
USE_OPERATOR_FUNCTIONS(CPUContext);
BinaryElementwiseDNNLowPOp(const OperatorDef& operator_def, Workspace* ws)
: DNNLowPOp<T, FP32_OP>(operator_def, ws),
OP_SINGLE_ARG(bool, "broadcast", enable_broadcast_, 0),
OP_SINGLE_ARG(int, "axis", axis_, -1),
OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
OP_SINGLE_ARG(string, "order", order_, "NCHW") {
// Figure out the correct axis to use.
if (enable_broadcast_) {
if (axis_ != -1) {
// Get axis from an explicit axis argument.
CAFFE_ENFORCE_EQ(
axis_str_.size(),
0,
"Args axis and axis_str cannot be used simultaneously.");
} else if (axis_str_.size()) {
// Get the axis index semantically.
CAFFE_ENFORCE_EQ(
axis_str_.size(), 1, "Unsupported axis string", axis_str_);
size_t semantic_axis_ = order_.find(axis_str_);
CAFFE_ENFORCE_NE(
semantic_axis_,
string::npos,
"Unrecognizable axis string ",
axis_str_,
" from order string ",
order_);
axis_ = semantic_axis_;
}
} else {
CAFFE_ENFORCE(
axis_ == -1 && axis_str_.size() == 0,
"Do not specify axis or axis_str if broadcast is not enabled.");
}
}
protected:
bool enable_broadcast_;
int axis_;
string axis_str_;
string order_;
dnnlowp::RequantizationParams requantization_params_;
}; // BinaryElementwiseDNNLowPOp
// For arithmetic operators, Eigen provides a good way to vectorize even
// when broadcasting.
#define DECLARE_EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \
struct Eigen##name##Functor { \
template <int b_is_scalar, typename T, typename R> \
inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
if (b_is_scalar) { \
EigenVectorArrayMap<R>(out, n) = \
eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \
} else { \
EigenVectorArrayMap<R>(out, n) = eigen_op( \
(ConstEigenVectorArrayMap<T>(a, n)), \
(ConstEigenVectorArrayMap<T>(b, n))); \
} \
} \
template <typename T, typename R> \
void RunWithBroadcast( \
const T* a, \
const T* b, \
R* out, \
size_t pre, \
size_t n, \
CPUContext*) { \
EigenArrayMap<R>(out, n, pre) = eigen_op( \
(ConstEigenArrayMap<T>(a, n, pre).colwise()), \
(ConstEigenVectorArrayMap<T>(b, n))); \
} \
template <typename T, typename R> \
void RunWithBroadcast2( \
const T* a, \
const T* b, \
R* out, \
size_t pre, \
size_t n, \
size_t post, \
CPUContext*) { \
for (const auto i : c10::irange(pre)) { \
EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \
(ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \
(Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \
} \
} \
};
} // namespace caffe2
|