File: onnxifi_transformer.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (210 lines) | stat: -rw-r--r-- 6,859 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#pragma once

#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>

#include "caffe2/opt/backend_cutting.h"
#include "onnx/onnx_pb.h"

#include "caffe2/core/operator.h"
#include "caffe2/onnx/onnxifi_init.h"
#include "caffe2/opt/backend_transformer_base.h"

namespace caffe2 {
namespace onnx {
class OnnxExporter;
}

// Split SparseLengthsSumSparse into SparseLengthsSumSparseLookup +
// SparseLengthsSum
TORCH_API void splitSparseLengthsSumSparse(NetDef* net, const Workspace& ws);

struct OnnxifiTransformerOptions final : public BackendTransformOptions {
  explicit OnnxifiTransformerOptions() : BackendTransformOptions() {}

  // Pass serialized onnx model if true, otherwise pass serialized c2 model
  bool use_onnx{false};

  // Whether to adjust batch at the outputs or not
  bool adjust_batch{true};

  // Whether to lower model blob by blob
  bool load_model_by_blob{false};

  // Whether to enforce fp32 inputs into fp16.
  bool enforce_fp32_inputs_into_fp16{false};

  // Whether to combine fp32 batched inputs into one tensor and convert it to
  // fp16 or not
  bool merge_fp32_inputs_into_fp16{false};

  // Whether to verify that a single subnet was created
  bool verify_only_single_subnet{false};

  // Whether the net has been ssaRewritten
  bool predictor_net_ssa_rewritten{false};

  // Inference timeout
  int timeout{0};

  // Mapping of batch sizes to shape infos
  std::unordered_map<int, ShapeInfoMap> shape_hints_per_bs;

  // Whether to read batch size from Onnxifi.
  bool use_onnxifi_batch_size{false};
};

class TORCH_API OnnxifiOptionHelper final {
 public:
  OnnxifiOptionHelper();

  // Set Onnxifi option
  bool setOnnxifiOption(const std::string& option, const std::string& value);

  //  Get Onnxifi option
  std::string getOnnxifiOption(const std::string& option);

 private:
  // Pointer to loaded onnxifi library
  onnxifi_library* lib_{nullptr};
};

class TORCH_API OnnxifiTransformer final : public BackendTransformerBase {
 public:
  explicit OnnxifiTransformer(const OnnxifiTransformerOptions& opts);
  ~OnnxifiTransformer() override;

  void transform(
      Workspace* ws,
      NetDef* pred_net,
      const std::vector<std::string>& weight_names,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<int>& blocklisted_ops) override;

  // Query whether an operator is supported by passing C2 protobuf
  bool supportOpC2(
      const caffe2::OperatorDef& op,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<std::string>& weights,
      const std::unordered_set<int>& blocklisted_ops,
      onnxBackendID backend_id) const;

  // Determine backend id
  std::vector<onnxBackendID> getBackendId();

 private:
  // Since we create new tensors during the conversion process, we actually need
  // into inject them into the original workspace
  // Since our onnx exporter uses std::unordered_map<std::string, TensorShape>
  // as lut, we need to include an extra copy of shape info and maintain them
  // together
  caffe2::NetDef SubnetToOnnxifiOpViaOnnx(
      const caffe2::NetDef& net,
      const std::unordered_set<std::string>& weights_in_ws,
      Workspace* ws,
      onnx::OnnxExporter* exporter,
      ShapeInfoMap* shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Convert a cutoff subgraph net to an Onnxifi op
  caffe2::NetDef SubnetToOnnxifiOpViaC2(
      const caffe2::NetDef& net,
      const std::unordered_set<std::string>& weights_in_ws,
      const ShapeInfoMap& shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Check that output shape hints are present to ensure we can pass them to
  // OnnxifiOp
  bool canPassOutputShapeHintsPerBs(
      const OperatorDef& op,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs) const;

  // We already have all the ops and external inputs and outputs!
  OperatorDef buildOnnxifiOp(
      const std::string& onnx_model_str,
      const std::unordered_set<std::string>& initialization_list,
      const std::vector<std::string>& external_inputs,
      const std::vector<std::string>& external_outputs,
      const ShapeInfoMap& shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Transform by passing C2 proto to backend
  opt::CutResult TransformViaC2(
      NetDef* pred_net,
      const std::unordered_set<std::string>& weights,
      const std::unordered_set<int>& blocklisted_ops,
      const ShapeInfoMap& shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Transform by passing ONNX proto to backend
  opt::CutResult TransformViaOnnx(
      Workspace* ws,
      NetDef* pred_net,
      const std::unordered_set<std::string>& weights,
      const std::unordered_set<int>& blocklisted_ops,
      ShapeInfoMap* shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Query whether an operator is supported by passing ONNX protobuf
  bool supportOpOnnx(
      const caffe2::OperatorDef& op,
      onnx::OnnxExporter* exporter,
      const std::unordered_set<int>& blocklisted_ops,
      onnxBackendID backend_id) const;

  // Tie the output of Gather to the scalar weight input of the
  // SparseLengthsWeighted* and SparseLengthsSumSparseLookup (which is split
  // from the SparseLengthsWeighted*Sparse) ops. If the latter is disabled,
  // disable the former too.
  void tieGatherAndSparseLengthsWeightedSumOps(
      const NetDef& net,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<std::string>& weights,
      std::unordered_set<int>* blocklisted_ops) const;

  // For net with partitioning info, blocklist ops that are supposed to run on
  // CPU, whose partition info will contain empty device_id list.
  void blocklistCpuPartition(
      const NetDef& net,
      std::unordered_set<int>* blocklisted_ops) const;

  // Rule based filtering
  void applyFilteringRules(
      const NetDef& net,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<std::string>& weights,
      std::unordered_set<int>* blocklisted_ops) const;

  // Extract partition info from the original net
  void extractPartitionInfo(const NetDef& net);

  // Options
  OnnxifiTransformerOptions opts_;

  // Pointer to loaded onnxifi library
  onnxifi_library* lib_{nullptr};

  // Number of backends
  size_t num_backends_{0};

  // backend idx
  int idx_{0};

  // Number of Onnxifi Ops we build so far
  int onnxifi_op_id_{0};

  // Model id
  std::string model_id_;

  // Backned IDs
  std::vector<onnxBackendID> backend_ids_;

  // A cache for ONNX shape hints
  std::unordered_map<std::string, TensorShape> shape_hints_onnx_;

  // Partition info
  std::vector<PartitionInfo> partition_infos_;
};
} // namespace caffe2