File: tanh.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (63 lines) | stat: -rw-r--r-- 1,751 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#pragma once

#include "caffe2/quantization/server/dnnlowp.h"

#include <cmath>
#include <vector>

namespace dnnlowp {

/**
 * We use the 3-region approach described in "Efficient VLSI Implementation of
 * Neural Networks with Hyperbolic Tangent Activation Function", IEEE
 * Transactions on Very Large Scale Integration Systems, Zamanlooy and
 * Mirhassani.
 * The pass region (x < x_pq) is approximated as x.
 * The saturation region (x >= x_sq) is approximated as 1.
 * The processing region (x_pq <= x < x_sq) is divided into sub-ranges and the
 * average value of tanh(x) is used per sub-range.
 */
template <typename T>
class Tanh {
 public:
  Tanh(double max_abs_err = DEFAULT_MAX_ABS_ERR);

  T Compute(T x) const;

  TensorQuantizationParams GetInputQuantizationParams() const {
    return in_qparams_;
  }
  TensorQuantizationParams GetOutputQuantizationParams() const {
    return out_qparams_;
  }

  int GetPassRegionEnd() const {
    return x_pq_index_;
  }

  float GetPassRegionEndDequantized() const {
    return fbgemm::Dequantize<T>(
        static_cast<uint8_t>(x_pq_index_ + in_qparams_.zero_point),
        in_qparams_);
  }

  float GetSaturationRegionBegin() const {
    return fbgemm::Dequantize<T>(
        static_cast<T>((1 << num_in_bits_) - 1), in_qparams_);
  }

  static constexpr double DEFAULT_MAX_ABS_ERR = 0.02;
  static constexpr int DEFAULT_NUM_IN_BITS = 8;
  static constexpr int DEFAULT_NUM_OUT_BITS = 8;

 private:
  const double max_abs_err_;
  const int num_in_bits_ = DEFAULT_NUM_IN_BITS;
  const int num_out_bits_ = DEFAULT_NUM_OUT_BITS;

  int x_pq_index_;
  std::vector<T> processing_region_lut_;
  TensorQuantizationParams in_qparams_, out_qparams_;
}; // class TanhApproximation

} // namespace dnnlowp