1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
#ifndef CAFFE2_OPERATORS_INT8_AVERAGE_POOL_OP_H_
#define CAFFE2_OPERATORS_INT8_AVERAGE_POOL_OP_H_
#include <qnnpack.h>
#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/tensor_int8.h"
#include "caffe2/operators/conv_pool_op_base.h"
#include "caffe2/operators/quantized/int8_utils.h"
namespace caffe2 {
namespace int8 {
template <Activation Ac>
class Int8AveragePoolOp final : public ConvPoolOpBase<CPUContext> {
public:
template <class... Args>
explicit Int8AveragePoolOp(Args&&... args)
: ConvPoolOpBase<CPUContext>(std::forward<Args>(args)...) {
OPERATOR_NEEDS_FEATURE(
this->order_ == StorageOrder::NHWC, "Int8 only supports NHWC order.");
}
~Int8AveragePoolOp() {
if (this->qnnpackOperator_ != nullptr) {
qnnp_delete_operator(this->qnnpackOperator_);
this->qnnpackOperator_ = nullptr;
}
if (this->qnnpackGlobalOperator_ != nullptr) {
qnnp_delete_operator(this->qnnpackGlobalOperator_);
this->qnnpackGlobalOperator_ = nullptr;
}
}
bool RunOnDeviceWithOrderNHWC() override {
const auto& X = Inputs()[0]->template Get<Int8TensorCPU>();
auto* Y = Outputs()[0]->template GetMutable<Int8TensorCPU>();
int32_t Y_zero_point =
this->template GetSingleArgument<int>("Y_zero_point", 0);
auto Y_scale = this->template GetSingleArgument<float>("Y_scale", 1);
Y->scale = Y_scale;
Y->zero_point = Y_zero_point;
TORCH_CHECK_EQ(X.t.dim(), 4);
const int channels = X.t.dim32(3);
ConvPoolOpBase<CPUContext>::SetOutputSize(X.t, &(Y->t), channels);
initQNNPACK();
const bool anyPadding =
pad_t() != 0 || pad_r() != 0 || pad_b() != 0 || pad_l() != 0;
const bool anyStride = stride_h() > 1 || stride_w() > 1;
const bool globalPooling = !anyPadding && !anyStride &&
(X.t.dim32(1) == kernel_h() && X.t.dim32(2) == kernel_w());
if (globalPooling) {
if (this->qnnpackGlobalOperator_ == nullptr) {
const qnnp_status createStatus =
qnnp_create_global_average_pooling_nwc_q8(
channels,
X.zero_point,
X.scale,
Y->zero_point,
Y->scale,
activationLimits(Y->scale, Y->zero_point, Ac).first,
activationLimits(Y->scale, Y->zero_point, Ac).second,
0 /* flags */,
&this->qnnpackGlobalOperator_);
CAFFE_ENFORCE(
createStatus == qnnp_status_success,
"failed to create QNNPACK Global Average Pooling operator");
CAFFE_ENFORCE(this->qnnpackGlobalOperator_ != nullptr);
}
const qnnp_status setupStatus = qnnp_setup_global_average_pooling_nwc_q8(
this->qnnpackGlobalOperator_,
X.t.dim32(0),
X.t.dim32(1) * X.t.dim32(2),
X.t.template data<uint8_t>(),
channels,
Y->t.template mutable_data<uint8_t>(),
channels);
CAFFE_ENFORCE(
setupStatus == qnnp_status_success,
"failed to setup QNNPACK Global Average Pooling operator");
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
const qnnp_status runStatus = qnnp_run_operator(
this->qnnpackGlobalOperator_, nullptr /* thread pool */);
#else
pthreadpool_t threadpool =
reinterpret_cast<pthreadpool_t>(ws_->GetThreadPool());
const qnnp_status runStatus =
qnnp_run_operator(this->qnnpackGlobalOperator_, threadpool);
#endif
CAFFE_ENFORCE(
runStatus == qnnp_status_success,
"failed to run QNNPACK Global Average Pooling operator");
} else {
if (this->qnnpackOperator_ == nullptr) {
const qnnp_status createStatus = qnnp_create_average_pooling2d_nhwc_q8(
pad_t(),
pad_r(),
pad_b(),
pad_l(),
kernel_h(),
kernel_w(),
stride_h(),
stride_w(),
channels,
X.zero_point,
X.scale,
Y->zero_point,
Y->scale,
activationLimits(Y->scale, Y->zero_point, Ac).first,
activationLimits(Y->scale, Y->zero_point, Ac).second,
0 /* flags */,
&this->qnnpackOperator_);
CAFFE_ENFORCE(
createStatus == qnnp_status_success,
"failed to create QNNPACK Average Pooling operator");
CAFFE_ENFORCE(this->qnnpackOperator_ != nullptr);
}
const qnnp_status setupStatus = qnnp_setup_average_pooling2d_nhwc_q8(
this->qnnpackOperator_,
X.t.dim32(0),
X.t.dim32(1),
X.t.dim32(2),
X.t.template data<uint8_t>(),
channels,
Y->t.template mutable_data<uint8_t>(),
channels,
nullptr /* thread pool */);
CAFFE_ENFORCE(
setupStatus == qnnp_status_success,
"failed to setup QNNPACK Average Pooling operator");
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
const qnnp_status runStatus =
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
#else
pthreadpool_t threadpool =
reinterpret_cast<pthreadpool_t>(ws_->GetThreadPool());
const qnnp_status runStatus =
qnnp_run_operator(this->qnnpackOperator_, threadpool);
#endif
CAFFE_ENFORCE(
runStatus == qnnp_status_success,
"failed to run QNNPACK Average Pooling operator");
}
return true;
}
private:
// QNNPACK Average Pooling operator
qnnp_operator_t qnnpackOperator_{nullptr};
// QNNPACK Global Average Pooling operator
qnnp_operator_t qnnpackGlobalOperator_{nullptr};
};
} // namespace int8
} // namespace caffe2
#endif // CAFFE2_OPERATORS_INT8_AVERAGE_POOL_OP_H_
|