File: fbgemm_pack_op.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
links: PTS, VCS
area: main
in suites: bookworm
size: 139,252 kB
sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (181 lines) | stat: -rw-r--r-- 5,763 bytes
parent folder | download | duplicates (2)
#pragma once

#include "caffe2/core/operator.h"
#include "caffe2/operators/conv_op.h"
#include "caffe2/quantization/server/conv_pool_dnnlowp_op_base.h"
#include "caffe2/quantization/server/fbgemm_pack_blob.h"
#include "caffe2/quantization/server/fully_connected_dnnlowp_op.h"

namespace caffe2 {

using FCFp32Op = FullyConnectedOp<CPUContext>;

void QuantizeConvBias(
    const Blob& blob,
    int M,
    const dnnlowp::TensorQuantizationParams& in_qparams,
    const vector<dnnlowp::TensorQuantizationParams>& filter_qparams,
    std::vector<int32_t>& b_quantized, bool use_fp16=false, bool round_nearest_even=true);

class FullyConnectedDNNLowPPackWeightOp final
    : public DNNLowPOp<std::uint8_t, FCFp32Op> {
 public:
  FullyConnectedDNNLowPPackWeightOp(
      const OperatorDef& operator_def,
      Workspace* ws);
  USE_OPERATOR_FUNCTIONS(CPUContext);

  bool RunOnDevice() override;

 private:
  int axis_w_;
  bool quantize_channelwise_;
  int nbits_in_non_outlier_; // only for DNNLOWP_ACC16
  bool save_unpacked_weights_;

  INPUT_TAGS(FILTER, BIAS);
};

using ConvFp32Op = ConvOp<float, CPUContext>;

/**
 * Pack a weight matrix that can be used by DNNLOWP Int8Conv operators.
 * DNNLOWP operators can pack matrix on demand during their first invocations
 * but calling this operator to pre-pack can have benefits like saving memory
 * space when multiple operators are sharing the same weight.
 * This operator should be a part of init net to be called once to populate
 * packed blob to be used by Int8Conv DNNLOWP operators in the predictor net
 *
 * This operator optionally can also pre-quantize bias.
 * Then, we should also provide the scale of input activation tensor as in_scale
 * argument.
 */
class ConvDNNLowPPackWeightOp final
    : public ConvPoolDNNLowPOpBase<std::uint8_t, ConvFp32Op> {
 public:
  USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
  USE_CONV_POOL_DNNLOWP_OPERATOR_BASE_FUNCTIONS(std::uint8_t, ConvFp32Op);
  ConvDNNLowPPackWeightOp(const OperatorDef& operator_def, Workspace* ws);

  bool RunOnDevice() override;

 private:
  bool TakeDepthWise3x3FastPath_();
  bool TakeDepthWise3x3x3FastPath_();
  bool TakeGConvFastPath_();

  fbgemm::conv_param_t<> GetConvParam_();
  fbgemm::conv_param_t<3> GetConv3DParam_();

  // Save quantized weights right after quantization before layout packing for
  // performance purpose
  bool save_unpacked_weights_;
  bool quantize_groupwise_;
  int nbits_in_non_outlier_; // only for DNNLOWP_ACC16

  INPUT_TAGS(FILTER, BIAS);
};

// Helper functions for packing weights that can be used by
// ConvDNNLowPAcc16PackWeightOp, ConvDNNLowPOp, and ConvDNNLowPAcc16Op

template <typename T>
void QuantizeWeight(
    const Blob& blob,
    int kernel_dim,
    int M,
    vector<dnnlowp::TensorQuantizationParams>& qparams,
    vector<typename std::make_signed<T>::type>& w_quantized,
    dnnlowp::QuantizationFactory* qfactory);

template <typename T>
void ComputeColumnOffsets(
    int num_rows,
    int num_cols,
    const T* W,
    const vector<dnnlowp::TensorQuantizationParams>& qparams,
    vector<int32_t>& col_offsets);

int CountOutliers(
    int groups,
    int kernel_dim,
    int M,
    int nbits_in_non_outlier,
    vector<std::int8_t>& W_quantized);

/**
 * @param W_quantized input quantized weight that is not packed yet
 */
fbgemm::CompressedSparseColumn* ExtractOutlierMatrix(
    int groups,
    int kernel_dim,
    int M,
    int nbits_in_non_outlier,
    vector<std::int8_t>& W_quantized);
/*
 * Set up used onnxifi data type constexpr
 * Should always be synced with onnxifi.h
 */
constexpr uint64_t kONNXIFI_DATATYPE_UINT8 = 2;
constexpr uint64_t kONNXIFI_DATATYPE_INT32 = 6;
constexpr uint64_t kONNXIFI_DATATYPE_INT8 = 3;

class Int8ConvDNNLowpPackedWeightBlobShapeFunctions
    : public ExternalTensorFunctionsBase {
 public:
  explicit Int8ConvDNNLowpPackedWeightBlobShapeFunctions()
      : ExternalTensorFunctionsBase() {}
  ~Int8ConvDNNLowpPackedWeightBlobShapeFunctions() override {}
  bool isQuantized() const override {
    return true;
  }
  bool IsSameMetaType(TypeIdentifier id) override;
  void SetupExternalTensorDescriptor(
      const Blob* blob,
      std::vector<std::vector<uint64_t>>* shapes,
      std::vector<std::vector<float>>* all_scales,
      std::vector<std::vector<int32_t>>* all_offsets,
      ExternalTensorDescriptor* desc) override;
  void LoadInfoOfBlob(
      const Blob* blob,
      std::vector<float>* scale,
      std::vector<float>* offset,
      uint32_t* axis) override;
  TypeIdentifier GetTypeMetaId() override;
  TypeMeta GetExternalTensorType(const void* c) override;
  vector<int64_t> GetExternalTensorInfo(
      const void* c,
      size_t* capacity,
      DeviceOption* device) override;
};

class Int8FCDNNLowpPackedWeightBlobShapeFunctions
    : public ExternalTensorFunctionsBase {
 public:
  explicit Int8FCDNNLowpPackedWeightBlobShapeFunctions()
      : ExternalTensorFunctionsBase() {}
  ~Int8FCDNNLowpPackedWeightBlobShapeFunctions() override {}
  bool isQuantized() const override {
    return true;
  }
  bool IsSameMetaType(TypeIdentifier id) override;
  void SetupExternalTensorDescriptor(
      const Blob* blob,
      std::vector<std::vector<uint64_t>>* shapes,
      std::vector<std::vector<float>>* all_scales,
      std::vector<std::vector<int32_t>>* all_offsets,
      ExternalTensorDescriptor* desc) override;
  void LoadInfoOfBlob(
      const Blob* blob,
      std::vector<float>* scale,
      std::vector<float>* offset,
      uint32_t* axis) override;
  TypeIdentifier GetTypeMetaId() override;
  TypeMeta GetExternalTensorType(const void* c) override;
  vector<int64_t> GetExternalTensorInfo(
      const void* c,
      size_t* capacity,
      DeviceOption* device) override;
};

} // namespace caffe2