File: dequantize_dnnlowp_op.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (80 lines) | stat: -rw-r--r-- 2,174 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include "dequantize_dnnlowp_op.h"

#include "caffe2/core/tensor_int8.h"
#include "caffe2_dnnlowp_utils.h"

namespace caffe2 {

template <typename T>
DequantizeDNNLowPOp<T>::DequantizeDNNLowPOp(
    const OperatorDef& operator_def,
    Workspace* ws)
    : Operator<CPUContext>(operator_def, ws),
      qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {
  if (this->debug_def().engine() == "DNNLOWP_16" ||
      this->debug_def().engine() == "DNNLOWP_ROWWISE_16") {
    LOG(WARNING)
        << this->debug_def().engine()
        << " is an experimental feature mostly for testing accuracy with "
           "fixed-point precision higher than 8 and performance is very slow";
  }
}

template <typename T>
bool DequantizeDNNLowPOp<T>::RunOnDevice() {
  using namespace dnnlowp;
  TensorQuantizationParams in_qparams =
      GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());

  const TensorCPU& input = InputIsType<int8::Int8TensorCPU>(0)
      ? this->template Input<int8::Int8TensorCPU>(0).t
      : Input(0);

  CAFFE_ENFORCE(input.template IsType<T>());
  Output(0)->ResizeLike(input);
  fbgemm::Dequantize<T>(
      input.template data<T>(),
      Output(0)->template mutable_data<float>(),
      input.numel(),
      in_qparams);

  return true;
}

OPERATOR_SCHEMA(Dequantize)
    .NumInputs(1)
    .NumOutputs(1)
    .IdenticalTypeAndShapeOfInput(0);

REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Dequantize,
    DNNLOWP,
    DequantizeDNNLowPOp<std::uint8_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Dequantize,
    DNNLOWP_ROWWISE,
    DequantizeDNNLowPOp<std::uint8_t>);

REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Dequantize,
    DNNLOWP_16,
    DequantizeDNNLowPOp<std::uint16_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Dequantize,
    DNNLOWP_ROWWISE_16,
    DequantizeDNNLowPOp<std::uint16_t>);

REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Int8Dequantize,
    DNNLOWP,
    DequantizeDNNLowPOp<std::uint8_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Int8Dequantize,
    DNNLOWP_ROWWISE,
    DequantizeDNNLowPOp<std::uint8_t>);
REGISTER_CPU_OPERATOR_WITH_ENGINE(
    Int8DequantizeRowWise,
    DNNLOWP,
    DequantizeDNNLowPOp<std::uint8_t>);

} // namespace caffe2