1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
|
#pragma once
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>
#include "caffe2/opt/backend_cutting.h"
#include "onnx/onnx_pb.h"
#include "caffe2/core/operator.h"
#include "caffe2/onnx/onnxifi_init.h"
#include "caffe2/opt/backend_transformer_base.h"
namespace caffe2 {
namespace onnx {
class OnnxExporter;
}
// Split SparseLengthsSumSparse into SparseLengthsSumSparseLookup +
// SparseLengthsSum
TORCH_API void splitSparseLengthsSumSparse(NetDef* net, const Workspace& ws);
struct OnnxifiTransformerOptions final : public BackendTransformOptions {
explicit OnnxifiTransformerOptions() : BackendTransformOptions() {}
// Pass serialized onnx model if true, otherwise pass serialized c2 model
bool use_onnx{false};
// Whether to adjust batch at the outputs or not
bool adjust_batch{true};
// Whether to lower model blob by blob
bool load_model_by_blob{false};
// Whether to enforce fp32 inputs into fp16.
bool enforce_fp32_inputs_into_fp16{false};
// Whether to combine fp32 batched inputs into one tensor and convert it to
// fp16 or not
bool merge_fp32_inputs_into_fp16{false};
// Whether to verify that a single subnet was created
bool verify_only_single_subnet{false};
// Whether the net has been ssaRewritten
bool predictor_net_ssa_rewritten{false};
// Inference timeout
int timeout{0};
// Mapping of batch sizes to shape infos
std::unordered_map<int, ShapeInfoMap> shape_hints_per_bs;
// Whether to read batch size from Onnxifi.
bool use_onnxifi_batch_size{false};
};
class TORCH_API OnnxifiOptionHelper final {
public:
OnnxifiOptionHelper();
// Set Onnxifi option
bool setOnnxifiOption(const std::string& option, const std::string& value);
// Get Onnxifi option
std::string getOnnxifiOption(const std::string& option);
private:
// Pointer to loaded onnxifi library
onnxifi_library* lib_{nullptr};
};
class TORCH_API OnnxifiTransformer final : public BackendTransformerBase {
public:
explicit OnnxifiTransformer(const OnnxifiTransformerOptions& opts);
~OnnxifiTransformer() override;
void transform(
Workspace* ws,
NetDef* pred_net,
const std::vector<std::string>& weight_names,
const ShapeInfoMap& shape_hints,
const std::unordered_set<int>& blocklisted_ops) override;
// Query whether an operator is supported by passing C2 protobuf
bool supportOpC2(
const caffe2::OperatorDef& op,
const ShapeInfoMap& shape_hints,
const std::unordered_set<std::string>& weights,
const std::unordered_set<int>& blocklisted_ops,
onnxBackendID backend_id) const;
// Determine backend id
std::vector<onnxBackendID> getBackendId();
private:
// Since we create new tensors during the conversion process, we actually need
// into inject them into the original workspace
// Since our onnx exporter uses std::unordered_map<std::string, TensorShape>
// as lut, we need to include an extra copy of shape info and maintain them
// together
caffe2::NetDef SubnetToOnnxifiOpViaOnnx(
const caffe2::NetDef& net,
const std::unordered_set<std::string>& weights_in_ws,
Workspace* ws,
onnx::OnnxExporter* exporter,
ShapeInfoMap* shape_hints_max_bs,
const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);
// Convert a cutoff subgraph net to an Onnxifi op
caffe2::NetDef SubnetToOnnxifiOpViaC2(
const caffe2::NetDef& net,
const std::unordered_set<std::string>& weights_in_ws,
const ShapeInfoMap& shape_hints_max_bs,
const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);
// Check that output shape hints are present to ensure we can pass them to
// OnnxifiOp
bool canPassOutputShapeHintsPerBs(
const OperatorDef& op,
const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs) const;
// We already have all the ops and external inputs and outputs!
OperatorDef buildOnnxifiOp(
const std::string& onnx_model_str,
const std::unordered_set<std::string>& initialization_list,
const std::vector<std::string>& external_inputs,
const std::vector<std::string>& external_outputs,
const ShapeInfoMap& shape_hints_max_bs,
const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);
// Transform by passing C2 proto to backend
opt::CutResult TransformViaC2(
NetDef* pred_net,
const std::unordered_set<std::string>& weights,
const std::unordered_set<int>& blocklisted_ops,
const ShapeInfoMap& shape_hints_max_bs,
const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);
// Transform by passing ONNX proto to backend
opt::CutResult TransformViaOnnx(
Workspace* ws,
NetDef* pred_net,
const std::unordered_set<std::string>& weights,
const std::unordered_set<int>& blocklisted_ops,
ShapeInfoMap* shape_hints_max_bs,
const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);
// Query whether an operator is supported by passing ONNX protobuf
bool supportOpOnnx(
const caffe2::OperatorDef& op,
onnx::OnnxExporter* exporter,
const std::unordered_set<int>& blocklisted_ops,
onnxBackendID backend_id) const;
// Tie the output of Gather to the scalar weight input of the
// SparseLengthsWeighted* and SparseLengthsSumSparseLookup (which is split
// from the SparseLengthsWeighted*Sparse) ops. If the latter is disabled,
// disable the former too.
void tieGatherAndSparseLengthsWeightedSumOps(
const NetDef& net,
const ShapeInfoMap& shape_hints,
const std::unordered_set<std::string>& weights,
std::unordered_set<int>* blocklisted_ops) const;
// For net with partitioning info, blocklist ops that are supposed to run on
// CPU, whose partition info will contain empty device_id list.
void blocklistCpuPartition(
const NetDef& net,
std::unordered_set<int>* blocklisted_ops) const;
// Rule based filtering
void applyFilteringRules(
const NetDef& net,
const ShapeInfoMap& shape_hints,
const std::unordered_set<std::string>& weights,
std::unordered_set<int>* blocklisted_ops) const;
// Extract partition info from the original net
void extractPartitionInfo(const NetDef& net);
// Options
OnnxifiTransformerOptions opts_;
// Pointer to loaded onnxifi library
onnxifi_library* lib_{nullptr};
// Number of backends
size_t num_backends_{0};
// backend idx
int idx_{0};
// Number of Onnxifi Ops we build so far
int onnxifi_op_id_{0};
// Model id
std::string model_id_;
// Backned IDs
std::vector<onnxBackendID> backend_ids_;
// A cache for ONNX shape hints
std::unordered_map<std::string, TensorShape> shape_hints_onnx_;
// Partition info
std::vector<PartitionInfo> partition_infos_;
};
} // namespace caffe2
|