1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
#include "caffe2/operators/quantized/int8_fc_op.h"
#include <functional>
#include "caffe2/operators/fc_inference.h"
namespace caffe2 {
REGISTER_CPU_OPERATOR(Int8FC, int8::Int8FCOp);
using namespace std::placeholders;
OPERATOR_SCHEMA(Int8FC)
.NumInputs(3, 5)
.NumOutputs(1, 4)
// NOLINTNEXTLINE(modernize-avoid-bind)
.TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
// NOLINTNEXTLINE(modernize-avoid-bind)
.CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false))
.SetDoc(R"DOC(
Computes the result of passing an input vector X into a fully
connected layer with 2D weight matrix W and 1D bias vector b. That is,
the layer computes Y = X * W^T + b, where X has size (M x K),
W has size (N x K), b has size (N), and Y has size (M x N),
where M is often the batch size.
NOTE: X does not need to explicitly be a 2D vector; rather, it will be
coerced into one. For an arbitrary n-dimensional tensor
X \in [a_0, a_1 * ... * a_{n-1}]. Only this case is supported!
Lastly, even though b is a 1D vector of size N, it is copied/resized to
be size (M x N) implicitly and added to each vector in the batch.
Each of these dimensions must be matched correctly, or else the operator
will throw errors.
)DOC")
.Arg("Y_scale", "Output tensor quantization scale")
.Arg("Y_zero_point", "Output tensor quantization offset")
.Input(
0,
"X",
"input tensor that's coerced into a 2D matrix of size (MxK) "
"as described above")
.Input(
1,
"W",
"A tensor that is coerced into a 2D blob of size (KxN) "
"containing fully connected weight matrix")
.Input(2, "b", "1D blob containing bias vector")
.Input(
3,
"Qparam",
"Optional Qparam blob that contains quant param computed on activation histogram data"
"Will overwrite Y_scale and Y_zero_point argument if specified")
.Input(
4,
"in_Qparam",
"Optional Qparam blob that contains quant param computed on activation histogram data"
"Will overwrite X_scale and X_zero_point argument if specified")
.Output(0, "Y", "2D output tensor");
} // namespace caffe2
|