File: caffe2.proto

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (528 lines) | stat: -rw-r--r-- 20,721 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
syntax = "proto2";

package caffe2;

// A few notes about the Caffe2's protobuffer convention:
// (1) Most objects are registered by their types, such as operators and nets.
//     For these, we have a string-type field "type" for registration purposes.
// (2) We do not use extension because that used to create quite some conflicts
//     in Caffe's protobuf design.
// (3) We have not used any proto3 specific features, such as Any or Map. This
//     is mainly for backward compatibility purposes but we may consider using
//     those in the future.

// TensorProto stores serialized Tensor objects.
message TensorProto {
  // The dimensions in the tensor.
  repeated int64 dims = 1;

  // Data type
  enum DataType {
    UNDEFINED = 0;

    // Basic types
    FLOAT = 1; // float
    INT32 = 2; // int
    BYTE = 3; // byte, when deserialized, is going to be restored as uint8
    STRING = 4; // string

    // Less-commonly used data types
    BOOL = 5; // bool
    UINT8 = 6; // uint8_t
    INT8 = 7; // int8_t
    UINT16 = 8; // uint16_t
    INT16 = 9; // int16_t
    INT64 = 10; // int64_t
    FLOAT16 = 12; // at::Half
    DOUBLE = 13; // double

    ZERO_COLLISION_HASH = 14; // zero-collision hash state
    REBATCHING_BUFFER = 15; // rebatching buffer
  }
  // The type of the deserialized tensor data
  optional DataType data_type = 2 [ default = FLOAT ];

  // The format of the serialized data.
  enum SerializationFormat {
    // FMT_PROTOBUF is the existing serialization format from before the
    // data_format field was introduced. Most data types are serialized using
    // the protobuf typed fields, although in some cases raw little endian data
    // is stored in the byte_data field instead.
    FMT_PROTOBUF = 0;
    // bfloat16 data stored in the raw_data field.
    FMT_BFLOAT16 = 1;
  }
  // data_format is a SerializationFormat enum value.
  // However, we intentionally store it as an integer value so we can
  // distinguish between old messages that do not have a data_format value vs
  // new messages that have a SerializationFormat value that we don't
  // understand.  If we stored this as an enum then protobuf would deserialize
  // both of these cases the same way.
  optional uint32 data_format = 15 [ default = 0 ];

  // For float
  repeated float float_data = 3 [ packed = true ];
  // For int32, uint8, int8, uint16, int16, bool, and float16
  // Note about float16: in storage we will basically convert float16 byte-wise
  // to unsigned short and then store them in the int32_data field.
  // Note: storing int8 and uint8 values in this field unfortunately results in
  // larger serialized data than necessary, as protobuf's varint encoding
  // scheme requires 2 bytes to represent int8 and uint8 values that have the
  // MSB set.
  repeated int32 int32_data = 4 [ packed = true ];
  // For bytes
  optional bytes byte_data = 5;
  // For strings
  repeated bytes string_data = 6;
  // For double
  repeated double double_data = 9 [ packed = true ];
  // For int64
  repeated int64 int64_data = 10 [ packed = true ];
  // store the raw data, contents are serialized as little-endian
  optional bytes raw_data = 13;

  // Optionally, a name for the tensor.
  optional string name = 7;

  // Optionally, a TensorProto can contain the details about the device that
  // it was serialized from. This is useful in cases like snapshotting a whole
  // workspace in a multi-GPU environment.
  optional DeviceOption device_detail = 8;

  // When loading from chunks this is going to indicate where to put data in the
  // full array. When not used full data have to be present
  message Segment {
    required int64 begin = 1;
    required int64 end = 2;
  }
  optional Segment segment = 11;

  // Field numbers 12 and 14 were previously used for now-deprecated fields.
  // reserved 12, 14;
}

message QTensorProto {
  repeated int64 dims = 1;
  required int32 precision = 2;
  required double scale = 3;
  required double bias = 4;
  required bool is_signed = 5;
  repeated int32 data = 6 [ packed = true ];
  optional string name = 7;
  optional TensorProto.DataType data_type = 8 [ default = INT32 ];

  // Multi-group quantization params
  repeated double scales = 9;
  repeated double biases = 10;

  // Multi-group quantization needed, indicates in which dimension
  // we do the "group wise quantization"
  optional int32 axis = 11;

  // It should be true if it is a multi-group quantization proto
  optional bool is_multiparam = 12 [ default = false ];
}

// TensorProtos stores multiple TensorProto objects in one single proto. This
// is useful for small tensors; For anything big, consider using a DB for
// storage.
message TensorProtos {
  repeated TensorProto protos = 1;
}

message TensorShape {
  repeated int64 dims = 1;
  optional TensorProto.DataType data_type = 2 [ default = FLOAT ];
  repeated int32 unknown_dims = 3;
  optional bool unknown_shape = 4 [ default = false ];
  optional string name = 5;
}

message TensorShapes {
  repeated TensorShape shapes = 1;
}

// TensorBoundShape is used to save bound shape inference result for a tensor.
// TensorBoundShape.shape is inferred shape for this tensor.
// TensorBoundShape.dimType contains dim_type for every dimension.
// eg: for dimension i, shape.dims[i] is the inferred shape and
// dim_type[i] is corresponding dim_type.
message TensorBoundShape {
  optional TensorShape shape = 1;
  enum DimType {
    UNKNOWN = 0; // unknown
    CONSTANT = 1; // constant
    // batch, corresponding dimension is batch_size
    BATCH = 2;
    // batch_of_feature_max,
    // corresponding shape is inferred_feature_length * batch_size
    BATCH_OF_FEATURE_MAX = 3;
    // batch_of_feature_max_default
    // corresponding shape is default_feature_length * batch_size
    BATCH_OF_FEATURE_MAX_DEFAULT = 4;
    // feature_max, corresponding shape is inferred_feature_length
    FEATURE_MAX = 5;
    // feature_max_default, corresponding shape is default_feature_length
    FEATURE_MAX_DEFAULT = 6;
  }
  repeated DimType dim_type = 2; // dim_type.size() == shape.dims.size()
  optional string name = 3;
  // a flag to indicate whether the shape is final and cannot be changed
  // eg: input/output of in-place ops
  optional bool shape_is_final = 4;
}

message TensorBoundShapes {
  repeated TensorBoundShape shapes = 1;
  optional int64 max_batch_size = 2;
  optional int64 max_feature_len = 3;
}

message AOTConfig {
  required int64 max_batch_size = 1;
  required int64 max_seq_size = 2;
  required bool in_batch_broadcast = 3;
  optional string onnxifi_blacklist_ops = 4;
  optional int32 onnxifi_min_ops = 5;
}

// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
  optional string name = 1;

  optional float f = 2;
  optional int64 i = 3;
  optional bytes s = 4;
  optional TensorProto t = 10;
  optional NetDef n = 8;

  repeated float floats = 5;
  repeated int64 ints = 6;
  repeated bytes strings = 7;
  repeated TensorProto tensors = 11;
  repeated NetDef nets = 9;
  repeated QTensorProto qtensors = 12;
}

// DeviceType that Caffe2 currently supports.
// Note: if you add a device type, make sure you add the corresponding device
// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
// and update c10/core/DeviceType.h
enum DeviceTypeProto {
  PROTO_CPU = 0; // In default, we will use CPU.
  PROTO_CUDA = 1; // CUDA.
  PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN
  PROTO_OPENGL = 3; // OpenGL
  PROTO_OPENCL = 4; // OpenCL
  PROTO_IDEEP = 5; // IDEEP.
  PROTO_HIP = 6; // AMD HIP
  PROTO_FPGA = 7; // FPGA
  PROTO_ORT = 8; // ONNX Runtime
  PROTO_XLA = 9; // XLA / TPU
  PROTO_MPS = 10; // MPS
  // Change the following number if you add more devices in the code.
  PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 11;
}

// Device-specific options. We do not distinguish DeviceOption protos for
// different DeviceTypes, so currently all devices share the same DeviceOption
// proto. Fields that are specific to a device type is ignored if the type does
// not match.
// Note: if you add fields to the DeviceOption, make sure you add the
// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
message DeviceOption {
  // [general] Options that need to be carried out before running the execution.
  // optional DeviceType device_type = 1 [ default = CPU ];
  optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
  // [general] Used together with device_type to identify the exact device
  optional int32 device_id = 2;
  // [general] The random seed to start the device random number generator with.
  optional uint32 random_seed = 3;
  // [general] What node this op should execute on.
  // Used for net transformation purposes. Must be empty at execution time.
  optional string node_name = 4;
  // [CPU and Linux specific] NUMA node id
  optional int32 numa_node_id = 5;
  // [general] Extra information passed, not used at execution time currently.
  repeated string extra_info = 6;
}

// Operator Definition.
message OperatorDef {
  repeated string input = 1; // the name of the input blobs
  repeated string output = 2; // the name of output top blobs
  optional string name = 3; // the operator name. This is optional.
  // the operator type. This is needed to create the object from the operator
  // registry.
  optional string type = 4;
  // arg is for the argument defined in operator schema
  repeated Argument arg = 5;

  // The device option that the operator should run under.
  optional DeviceOption device_option = 6;

  // Optionally, one can specify an engine when there are multiple
  // implementations available simultaneously for one device type.
  // If one specifies an engine but that engine does not exist in the compiled
  // Caffe2 binary, Caffe2 will fall back to the default engine of that device
  // type.
  optional string engine = 7;

  // Additional 'fake' inputs used for expressing control dependencies
  // in the operator graph. This can be used to ensure that an
  // operator does not run until another operator is ready, for e.g.
  // scheduling control. These are not passed as actual inputs to the
  // Operator implementation, and are only used by the Net class for
  // scheduling purposes.
  repeated string control_input = 8;

  // is_gradient_op argument is only used as a hint in shape inference
  // and has no runtime significance
  optional bool is_gradient_op = 9 [ default = false ];

  // debug information associated with the construction of the operator.
  // This is an optional string with no assumed characteristics as
  // operators can be constructed in any language.
  optional string debug_info = 10;

  // the domain of the operator to help runtime distinguish which operator
  // library this OperatorDef refers to. For example, both caffe2 and aten
  // has `Add` operator, with domain, we can easily decide which operator
  // to execute. to support multiple operator libs, we use domain to
  // distinguish which operator lib we refer to:
  //   - "caffe2" means this uses Caffe2 operator library
  //   - "aten" means this uses ATen operator library
  //   - "c10" is for the fused library
  //   - if the domain is missing or empty, we use "caffe2", this is for
  //     legacy models, new serializer should always export an OperatorDef
  //     with domain and op_version
  optional string domain = 11;
  // each operator is has its own version number.
  // operator version information
  // each time, we change the API or semantics of the operator,
  // we bump the version for the operator.
  // the runtime system should check the op_version of each OperatorDef
  // and decide it should reject or accept the model
  optional int64 op_version = 12;
}

// MapFieldEntry follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message MapFieldEntry {
  required string key = 1;
  required string val = 2;
};

// Used to hold backend-specific options.
message BackendOptions {
  // Name of the backend that the specified options apply to.
  required string backend_name = 1;
  // Flexible map for passing in the options.
  repeated MapFieldEntry option = 2;
};

// Partition definition.
message PartitionInfo {
  // Name of the partition.
  required string name = 1;

  // A list of logic device ID, indicating which devices this partition
  // can be executed on. If empty, it means the partition won't run on
  // device but on host CPU instead.
  repeated int32 device_id = 2;

  // Extra debug info.
  optional string extra_info = 3;

  // Flexible map for passing options specific to a backend.
  repeated BackendOptions backend_options = 4;
}

// Network definition.
message NetDef {
  optional string name = 1; // the network's name
  // Operators that the network contains.
  // Note: this is not named "operator" because that is a reserved word in C++.
  repeated OperatorDef op = 2;

  // The type of network that the net should be run with. This routes the
  // network instantiation to different execution modes. The default mode,
  // "simple", runs the operators in a sequential way as the original Caffe
  // implementation does.
  optional string type = 3;

  // the number of workers, if the operators in the network is to be carried out
  // in parallel.
  // Note: This is to be deprecated. Using the arg field with "num_workers" as
  // key.
  // Note 2: The old uses of this were never actually cleaned up
  optional int32 num_workers = 4;

  // The device option for the network. If a network has a specific device
  // option and one of its operators does not have it set, we will copy over the
  // device option to the operator. This allows us to basically avoid putting
  // device options at every operator.
  optional DeviceOption device_option = 5;

  repeated Argument arg = 6;

  // Two optional fields to declare external input and output of a net.
  // If these two are set, when a net is created, we will sanity check for
  // every op whether its input is declared (either as an external input,
  // or as an intermediate blob created by one of the ops), and sanity check
  // if all blobs in external_output are produced.
  //
  // In cases of memory optimization, declaring external_input and
  // external_output also ensures that storage of these blobs are persistent:
  // for any blob in external_input and external_output, after a network run
  // finishes, their content are actually the right content. Any intermediate
  // blobs' contents may be overwritten.
  repeated string external_input = 7;
  repeated string external_output = 8;

  // Partitioning info, indexed by partition names.
  repeated PartitionInfo partition_info = 9;
}

// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
// now.
message ExecutionStep {
  // ExecutionStep should either contain a set of substeps, or a set of
  // network names to run in this execution step. They should NOT both be set
  // at the same time.
  optional string name = 1;
  // An execution step could be recursive, in which it involves a set of
  // substeps.
  repeated ExecutionStep substep = 2;
  // Alternatively, an execution step could involve one or more networks.
  // Note that you cannot have both substeps and networks. Choose one.
  // Note that an execution step refers networks by their name. The actual
  // network definition of the same name should be included in the network field
  // of the plan. The reason is that a network object might hold internal states
  // (think of a data layer), so we want to have the same network object that
  // multiple steps could ask to run.
  repeated string network = 3;
  // Number of iterations to run this step. The substeps or the networks
  // specified will be run sequentially, and one sequential run is considered
  // one iteration. If this is not set, the number of iterations is assumed to
  // be 1.
  optional int64 num_iter = 4;

  // Criteria network specifies a single output (TensorCPU<bool>) of
  // size (1), is run on every iteration by the executor, and
  // execution terminates when the output[0] is `false`.
  optional string criteria_network = 5 [ deprecated = true ];

  // DEPRECATED. Use `run_every_ms`.
  optional string report_net = 7;
  optional int32 report_interval = 8;

  // If provided, execute this step at every time interval (in millisecs)
  // while its sibiling execution steps execute in parallel. This step is
  // guaranteed to run at least once after all non-interval siblings finished.
  optional int64 run_every_ms = 11;

  // If false or not set, execute sub-steps serially.
  // If true, execute all substeps concurrently, each one in a separate thread.
  optional bool concurrent_substeps = 6;

  // Name of a scalar boolean tensor.
  // ES checks this blob AFTER every substeps/subnets.
  // If specified, and the value is true, then ES will skip the rest and return
  // immediately.
  // This means that the report_net and the first step will always be called.
  // Use cases:
  // 1) the first substep stops the rest if data condition not met
  // 2) the first substep decide which of the rest of the steps should be run.
  // 3) external control
  //
  // ** It is the user's responsibility to not to put this blob in race
  // conditions.
  // ** For example when setting this blob in concurrent substeps
  optional string should_stop_blob = 9;

  // if only_once is true, this step will only be executed once. this ONLY takes
  // effect when using should_stop_blob
  optional bool only_once = 10;

  // Whether to create a child workspace for this step.
  // If yes, the workflow and nets are re-created every time this step is run.
  optional bool create_workspace = 12;

  // How many copies of the children execution steps to run concurrently.
  optional int32 num_concurrent_instances = 13;
}

message PlanDef {
  // All the networks that are used in this execution. Note that networks should
  // be ordered in the way they are executed, i.e. for a layer in a network, all
  // its input blobs should already have been initialized by the layers or
  // networks defined before it.
  optional string name = 1;
  // The networks that are going to be used in this plan.
  repeated NetDef network = 2;
  repeated ExecutionStep execution_step = 3;
}

// Protobuf format for blobs that are not Tensors. We use a key to store the
// type of the blob. For example for a serialized DBProto, the type should
// be "DBReader" and the content should be a serialized DBProto object.
message BlobProto {
  optional string name = 1;
  optional string type = 2;
  optional TensorProto tensor = 3;
  optional bytes content = 4;
  optional QTensorProto qtensor = 5;
  // If blob is not Tensor and is divided into chunks, content_num_chunks
  // contains number of chunks, into which blob was divided.
  optional int32 content_num_chunks = 6;
  optional int32 content_chunk_id = 7;
}

// Protobuf format to serialize DBReader.
message DBReaderProto {
  // The name for the DB object in the workspace.
  optional string name = 1;
  // The source of the DB
  optional string source = 2;
  // The type of the DB
  optional string db_type = 3;
  // The current key of the DB if the DB supports seeking.
  optional string key = 4;
}

message BlobSerializationOptions {
  // This set of options will only apply to blobs whose name matches this
  // pattern.  If the blob_name_pattern is empty then it will be treated as
  // matching all blobs.
  optional string blob_name_regex = 1;

  // Note:
  // - a chunk_size of 0 means "use the default chunk size".  The default chunk
  //   size is controlled by the --caffe2_tensor_chunk_size command line flag.
  // - a chunk size of -1 means to disable chunking, and serialize the blob in
  //   a single chunk.
  optional int64 chunk_size = 2;

  enum FloatFormat {
    // Use the current default serialization format, as chosen by the
    // current version of the code.  (At the time of writing this is PROTOBUF)
    FLOAT_DEFAULT = 0;
    // Store the data in the TensorProto's float_data field
    FLOAT_PROTOBUF = 1;
    // Serialize float values as bfloat16.  Note that this conversion is lossy.
    FLOAT_BFLOAT16 = 2;
  }

  // Settings for how to serialize tensors containing float values
  optional FloatFormat float_format = 3;
}

message SerializationOptions {
  // A set of options to use when serialializing blobs.
  // This is a list, sorted from highest to lowest precedence.  When
  // serializing a blob, the first entry whose blob_name_pattern matches the
  // blob name will be used.
  repeated BlobSerializationOptions options = 1;
}