File: fbgemm_pack_op.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (181 lines) | stat: -rw-r--r-- 5,763 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#pragma once

#include "caffe2/core/operator.h"
#include "caffe2/operators/conv_op.h"
#include "caffe2/quantization/server/conv_pool_dnnlowp_op_base.h"
#include "caffe2/quantization/server/fbgemm_pack_blob.h"
#include "caffe2/quantization/server/fully_connected_dnnlowp_op.h"

namespace caffe2 {

using FCFp32Op = FullyConnectedOp<CPUContext>;

void QuantizeConvBias(
    const Blob& blob,
    int M,
    const dnnlowp::TensorQuantizationParams& in_qparams,
    const vector<dnnlowp::TensorQuantizationParams>& filter_qparams,
    std::vector<int32_t>& b_quantized, bool use_fp16=false, bool round_nearest_even=true);

class FullyConnectedDNNLowPPackWeightOp final
    : public DNNLowPOp<std::uint8_t, FCFp32Op> {
 public:
  FullyConnectedDNNLowPPackWeightOp(
      const OperatorDef& operator_def,
      Workspace* ws);
  USE_OPERATOR_FUNCTIONS(CPUContext);

  bool RunOnDevice() override;

 private:
  int axis_w_;
  bool quantize_channelwise_;
  int nbits_in_non_outlier_; // only for DNNLOWP_ACC16
  bool save_unpacked_weights_;

  INPUT_TAGS(FILTER, BIAS);
};

using ConvFp32Op = ConvOp<float, CPUContext>;

/**
 * Pack a weight matrix that can be used by DNNLOWP Int8Conv operators.
 * DNNLOWP operators can pack matrix on demand during their first invocations
 * but calling this operator to pre-pack can have benefits like saving memory
 * space when multiple operators are sharing the same weight.
 * This operator should be a part of init net to be called once to populate
 * packed blob to be used by Int8Conv DNNLOWP operators in the predictor net
 *
 * This operator optionally can also pre-quantize bias.
 * Then, we should also provide the scale of input activation tensor as in_scale
 * argument.
 */
class ConvDNNLowPPackWeightOp final
    : public ConvPoolDNNLowPOpBase<std::uint8_t, ConvFp32Op> {
 public:
  USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
  USE_CONV_POOL_DNNLOWP_OPERATOR_BASE_FUNCTIONS(std::uint8_t, ConvFp32Op);
  ConvDNNLowPPackWeightOp(const OperatorDef& operator_def, Workspace* ws);

  bool RunOnDevice() override;

 private:
  bool TakeDepthWise3x3FastPath_();
  bool TakeDepthWise3x3x3FastPath_();
  bool TakeGConvFastPath_();

  fbgemm::conv_param_t<> GetConvParam_();
  fbgemm::conv_param_t<3> GetConv3DParam_();

  // Save quantized weights right after quantization before layout packing for
  // performance purpose
  bool save_unpacked_weights_;
  bool quantize_groupwise_;
  int nbits_in_non_outlier_; // only for DNNLOWP_ACC16

  INPUT_TAGS(FILTER, BIAS);
};

// Helper functions for packing weights that can be used by
// ConvDNNLowPAcc16PackWeightOp, ConvDNNLowPOp, and ConvDNNLowPAcc16Op

template <typename T>
void QuantizeWeight(
    const Blob& blob,
    int kernel_dim,
    int M,
    vector<dnnlowp::TensorQuantizationParams>& qparams,
    vector<typename std::make_signed<T>::type>& w_quantized,
    dnnlowp::QuantizationFactory* qfactory);

template <typename T>
void ComputeColumnOffsets(
    int num_rows,
    int num_cols,
    const T* W,
    const vector<dnnlowp::TensorQuantizationParams>& qparams,
    vector<int32_t>& col_offsets);

int CountOutliers(
    int groups,
    int kernel_dim,
    int M,
    int nbits_in_non_outlier,
    vector<std::int8_t>& W_quantized);

/**
 * @param W_quantized input quantized weight that is not packed yet
 */
fbgemm::CompressedSparseColumn* ExtractOutlierMatrix(
    int groups,
    int kernel_dim,
    int M,
    int nbits_in_non_outlier,
    vector<std::int8_t>& W_quantized);
/*
 * Set up used onnxifi data type constexpr
 * Should always be synced with onnxifi.h
 */
constexpr uint64_t kONNXIFI_DATATYPE_UINT8 = 2;
constexpr uint64_t kONNXIFI_DATATYPE_INT32 = 6;
constexpr uint64_t kONNXIFI_DATATYPE_INT8 = 3;

class Int8ConvDNNLowpPackedWeightBlobShapeFunctions
    : public ExternalTensorFunctionsBase {
 public:
  explicit Int8ConvDNNLowpPackedWeightBlobShapeFunctions()
      : ExternalTensorFunctionsBase() {}
  ~Int8ConvDNNLowpPackedWeightBlobShapeFunctions() override {}
  bool isQuantized() const override {
    return true;
  }
  bool IsSameMetaType(TypeIdentifier id) override;
  void SetupExternalTensorDescriptor(
      const Blob* blob,
      std::vector<std::vector<uint64_t>>* shapes,
      std::vector<std::vector<float>>* all_scales,
      std::vector<std::vector<int32_t>>* all_offsets,
      ExternalTensorDescriptor* desc) override;
  void LoadInfoOfBlob(
      const Blob* blob,
      std::vector<float>* scale,
      std::vector<float>* offset,
      uint32_t* axis) override;
  TypeIdentifier GetTypeMetaId() override;
  TypeMeta GetExternalTensorType(const void* c) override;
  vector<int64_t> GetExternalTensorInfo(
      const void* c,
      size_t* capacity,
      DeviceOption* device) override;
};

class Int8FCDNNLowpPackedWeightBlobShapeFunctions
    : public ExternalTensorFunctionsBase {
 public:
  explicit Int8FCDNNLowpPackedWeightBlobShapeFunctions()
      : ExternalTensorFunctionsBase() {}
  ~Int8FCDNNLowpPackedWeightBlobShapeFunctions() override {}
  bool isQuantized() const override {
    return true;
  }
  bool IsSameMetaType(TypeIdentifier id) override;
  void SetupExternalTensorDescriptor(
      const Blob* blob,
      std::vector<std::vector<uint64_t>>* shapes,
      std::vector<std::vector<float>>* all_scales,
      std::vector<std::vector<int32_t>>* all_offsets,
      ExternalTensorDescriptor* desc) override;
  void LoadInfoOfBlob(
      const Blob* blob,
      std::vector<float>* scale,
      std::vector<float>* offset,
      uint32_t* axis) override;
  TypeIdentifier GetTypeMetaId() override;
  TypeMeta GetExternalTensorType(const void* c) override;
  vector<int64_t> GetExternalTensorInfo(
      const void* c,
      size_t* capacity,
      DeviceOption* device) override;
};

} // namespace caffe2