1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
|
#include "quantize_dnnlowp_op.h"
#include "dnnlowp_op.h"
#include "caffe2/core/tensor_int8.h"
#include "caffe2/quantization/server/int8_gen_quant_params.h"
#include "caffe2_dnnlowp_utils.h"
#include "dnnlowp_partition.h"
namespace caffe2 {
using namespace std;
template <typename T>
QuantizeDNNLowPOp<T>::QuantizeDNNLowPOp(
const OperatorDef& operator_def,
Workspace* ws)
: Operator<CPUContext>(operator_def, ws),
qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {}
template <typename T>
bool QuantizeDNNLowPOp<T>::RunOnDevice() {
using namespace dnnlowp;
if (!arguments_parsed_) {
dnnlowp::ParseDNNLowPOperatorArguments(this);
arguments_parsed_ = true;
}
CAFFE_ENFORCE(InputSize() <= 2);
CAFFE_ENFORCE(Input(0).template IsType<float>());
bool use_input_qparam = false;
float in_scale = 0;
int in_zero_point = 0;
if (InputSize() == 2) {
use_input_qparam = true;
const auto* input_qparam_blob =
Input<caffe2::unique_ptr<Int8QuantParamsBlob>>(1).get();
CAFFE_ENFORCE(input_qparam_blob);
in_scale = input_qparam_blob->qparam.scale;
in_zero_point = input_qparam_blob->qparam.zero_point;
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
TensorQuantizationParams in_qparams;
if (use_input_qparam) {
in_qparams.scale = in_scale;
in_qparams.zero_point = in_zero_point;
in_qparams.precision = qfactory_->GetActivationPrecision();
} else {
if (HasStaticQuantization(this)) {
in_qparams = GetStaticQuantizationParamsOf(this, 0);
} else {
in_qparams = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
}
}
int8::Int8TensorCPU* output =
Outputs()[0]->template GetMutable<int8::Int8TensorCPU>();
output->t.ResizeLike(Input(0));
const float* in_data = Input(0).template data<float>();
T* out_data = output->t.template mutable_data<T>();
fbgemm::Quantize<T>(in_data, out_data, Input(0).numel(), in_qparams);
PropagateOutputTensorQuantizationParams(this, 0, in_qparams);
return true;
}
OPERATOR_SCHEMA(Quantize)
.NumInputs(1, 2)
.NumOutputs(1)
.IdenticalTypeAndShapeOfInput(0);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
Quantize,
DNNLOWP,
QuantizeDNNLowPOp<uint8_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
Quantize,
DNNLOWP_ROWWISE,
QuantizeDNNLowPOp<uint8_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
Quantize,
DNNLOWP_16,
QuantizeDNNLowPOp<uint16_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
Quantize,
DNNLOWP_ROWWISE_16,
QuantizeDNNLowPOp<uint16_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
Int8Quantize,
DNNLOWP,
QuantizeDNNLowPOp<uint8_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
Int8Quantize,
DNNLOWP_ROWWISE,
QuantizeDNNLowPOp<uint8_t>);
} // namespace caffe2
|