File: caffe2_dnnlowp_utils.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (110 lines) | stat: -rw-r--r-- 3,209 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#pragma once

#include "caffe2/core/operator.h"
#include "caffe2/quantization/server/dnnlowp.h"
#include "caffe2/utils/eigen_utils.h"

namespace dnnlowp {

/**
 * Let consumers of op know that qparams the quantization parameter used
 * for output_index'th output of op.
 */
void PropagateOutputTensorQuantizationParams(
    caffe2::OperatorBase* op,
    int output_index,
    const TensorQuantizationParams& qparams);

/**
 * If input_index'th input is already quantized, return quantization parameter
 * used for the input tensor (should've been set by
 * PropagateOutputTensorQuantizationParams when the producer was invoked).
 * If the input tensor is not quantized, return the quantization parameter
 * chosen by qfactory based on the distribution of the input tensor
 */
TensorQuantizationParams GetInputTensorQuantizationParamsOf(
    caffe2::OperatorBase* op,
    int input_index,
    const QuantizationFactory* qfactory,
    bool is_weight = false);

void SetStaticQuantizationParams(
    caffe2::OperatorBase* op,
    int output_index,
    const TensorQuantizationParams& qparams);

/**
 * @return true if op's outputs should use static quantization (i.e. op has
 *              Y_scale and optionally Y_zero_offset argument).
 */
bool HasStaticQuantization(
    const caffe2::OperatorBase* op,
    int output_index = 0);

/**
 * Get output_index'th quantization parameter.
 * Should be used only when UseStaticQuantization is true
 */
TensorQuantizationParams GetStaticQuantizationParamsOf(
    const caffe2::OperatorBase* op,
    int output_index);

/**
 * Quantize input_index'th input if it's not already quantized.
 * a vector temp should be passed to store quantized results.
 *
 * @return array of quantized values
 */
template <typename T>
const T* QuantizeInputIfNeeded(
    caffe2::OperatorBase* op,
    int input_index,
    const TensorQuantizationParams& qparams,
    std::vector<T>& temp);

template <typename T>
const T* RowWiseQuantizeInputIfNeeded(
    caffe2::OperatorBase* op,
    int input_index,
    const std::vector<TensorQuantizationParams>& qparams,
    std::vector<T>& temp);

struct QuantizationErrorStats {
  float sum_sq{0}, sum_err_sq{0};
  float max_abs_err{0};
  // actual and reference values that resulted in max_abs_err
  float max_err_actual{0}, max_err_ref{0};
  int measure_cnt{0};
};

void MeasureQuantizationError(
    const float* actual,
    const float* ref,
    size_t len,
    QuantizationErrorStats* stat);

void ReportQuantizationError(
    const caffe2::OperatorBase* op,
    const QuantizationErrorStats& stat);

/**
 * Get QuantizationFactory based on the arguments of op
 */
std::unique_ptr<QuantizationFactory> GetQuantizationFactoryOf(
    const caffe2::OperatorBase* op);

void AdjustOutputTensorQuantizationParamsWithFollowedBy(
    caffe2::OperatorBase* op,
    const std::string& followed_by);

void ParseDNNLowPOperatorArguments(
    caffe2::OperatorBase* op,
    bool* dequantize_output = nullptr,
    bool* measure_quantization_error = nullptr,
    std::string* followed_by = nullptr);

caffe2::NetDef AddScaleZeroOffsetArgumentsWithHistogram(
    caffe2::NetDef net_def,
    const std::string& histogram_file_name);

} // namespace dnnlowp