1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
|
syntax = "proto2";
package caffe2;
// A few notes about the Caffe2's protobuffer convention:
// (1) Most objects are registered by their types, such as operators and nets.
// For these, we have a string-type field "type" for registration purposes.
// (2) We do not use extension because that used to create quite some conflicts
// in Caffe's protobuf design.
// (3) We have not used any proto3 specific features, such as Any or Map. This
// is mainly for backward compatibility purposes but we may consider using
// those in the future.
// TensorProto stores serialized Tensor objects.
message TensorProto {
// The dimensions in the tensor.
repeated int64 dims = 1;
// Data type
enum DataType {
UNDEFINED = 0;
// Basic types
FLOAT = 1; // float
INT32 = 2; // int
BYTE = 3; // byte, when deserialized, is going to be restored as uint8
STRING = 4; // string
// Less-commonly used data types
BOOL = 5; // bool
UINT8 = 6; // uint8_t
INT8 = 7; // int8_t
UINT16 = 8; // uint16_t
INT16 = 9; // int16_t
INT64 = 10; // int64_t
FLOAT16 = 12; // at::Half
DOUBLE = 13; // double
ZERO_COLLISION_HASH = 14; // zero-collision hash state
REBATCHING_BUFFER = 15; // rebatching buffer
}
// The type of the deserialized tensor data
optional DataType data_type = 2 [ default = FLOAT ];
// The format of the serialized data.
enum SerializationFormat {
// FMT_PROTOBUF is the existing serialization format from before the
// data_format field was introduced. Most data types are serialized using
// the protobuf typed fields, although in some cases raw little endian data
// is stored in the byte_data field instead.
FMT_PROTOBUF = 0;
// bfloat16 data stored in the raw_data field.
FMT_BFLOAT16 = 1;
}
// data_format is a SerializationFormat enum value.
// However, we intentionally store it as an integer value so we can
// distinguish between old messages that do not have a data_format value vs
// new messages that have a SerializationFormat value that we don't
// understand. If we stored this as an enum then protobuf would deserialize
// both of these cases the same way.
optional uint32 data_format = 15 [ default = 0 ];
// For float
repeated float float_data = 3 [ packed = true ];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
// Note: storing int8 and uint8 values in this field unfortunately results in
// larger serialized data than necessary, as protobuf's varint encoding
// scheme requires 2 bytes to represent int8 and uint8 values that have the
// MSB set.
repeated int32 int32_data = 4 [ packed = true ];
// For bytes
optional bytes byte_data = 5;
// For strings
repeated bytes string_data = 6;
// For double
repeated double double_data = 9 [ packed = true ];
// For int64
repeated int64 int64_data = 10 [ packed = true ];
// store the raw data, contents are serialized as little-endian
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7;
// Optionally, a TensorProto can contain the details about the device that
// it was serialized from. This is useful in cases like snapshotting a whole
// workspace in a multi-GPU environment.
optional DeviceOption device_detail = 8;
// When loading from chunks this is going to indicate where to put data in the
// full array. When not used full data have to be present
message Segment {
required int64 begin = 1;
required int64 end = 2;
}
optional Segment segment = 11;
// Field numbers 12 and 14 were previously used for now-deprecated fields.
// reserved 12, 14;
}
message QTensorProto {
repeated int64 dims = 1;
required int32 precision = 2;
required double scale = 3;
required double bias = 4;
required bool is_signed = 5;
repeated int32 data = 6 [ packed = true ];
optional string name = 7;
optional TensorProto.DataType data_type = 8 [ default = INT32 ];
// Multi-group quantization params
repeated double scales = 9;
repeated double biases = 10;
// Multi-group quantization needed, indicates in which dimension
// we do the "group wise quantization"
optional int32 axis = 11;
// It should be true if it is a multi-group quantization proto
optional bool is_multiparam = 12 [ default = false ];
}
// TensorProtos stores multiple TensorProto objects in one single proto. This
// is useful for small tensors; For anything big, consider using a DB for
// storage.
message TensorProtos {
repeated TensorProto protos = 1;
}
message TensorShape {
repeated int64 dims = 1;
optional TensorProto.DataType data_type = 2 [ default = FLOAT ];
repeated int32 unknown_dims = 3;
optional bool unknown_shape = 4 [ default = false ];
optional string name = 5;
}
message TensorShapes {
repeated TensorShape shapes = 1;
}
// TensorBoundShape is used to save bound shape inference result for a tensor.
// TensorBoundShape.shape is inferred shape for this tensor.
// TensorBoundShape.dimType contains dim_type for every dimension.
// eg: for dimension i, shape.dims[i] is the inferred shape and
// dim_type[i] is corresponding dim_type.
message TensorBoundShape {
optional TensorShape shape = 1;
enum DimType {
UNKNOWN = 0; // unknown
CONSTANT = 1; // constant
// batch, corresponding dimension is batch_size
BATCH = 2;
// batch_of_feature_max,
// corresponding shape is inferred_feature_length * batch_size
BATCH_OF_FEATURE_MAX = 3;
// batch_of_feature_max_default
// corresponding shape is default_feature_length * batch_size
BATCH_OF_FEATURE_MAX_DEFAULT = 4;
// feature_max, corresponding shape is inferred_feature_length
FEATURE_MAX = 5;
// feature_max_default, corresponding shape is default_feature_length
FEATURE_MAX_DEFAULT = 6;
}
repeated DimType dim_type = 2; // dim_type.size() == shape.dims.size()
optional string name = 3;
// a flag to indicate whether the shape is final and cannot be changed
// eg: input/output of in-place ops
optional bool shape_is_final = 4;
}
message TensorBoundShapes {
repeated TensorBoundShape shapes = 1;
optional int64 max_batch_size = 2;
optional int64 max_feature_len = 3;
}
message AOTConfig {
required int64 max_batch_size = 1;
required int64 max_seq_size = 2;
required bool in_batch_broadcast = 3;
optional string onnxifi_blacklist_ops = 4;
optional int32 onnxifi_min_ops = 5;
}
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
optional string name = 1;
optional float f = 2;
optional int64 i = 3;
optional bytes s = 4;
optional TensorProto t = 10;
optional NetDef n = 8;
repeated float floats = 5;
repeated int64 ints = 6;
repeated bytes strings = 7;
repeated TensorProto tensors = 11;
repeated NetDef nets = 9;
repeated QTensorProto qtensors = 12;
}
// DeviceType that Caffe2 currently supports.
// Note: if you add a device type, make sure you add the corresponding device
// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
// and update c10/core/DeviceType.h
enum DeviceTypeProto {
PROTO_CPU = 0; // In default, we will use CPU.
PROTO_CUDA = 1; // CUDA.
PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN
PROTO_OPENGL = 3; // OpenGL
PROTO_OPENCL = 4; // OpenCL
PROTO_IDEEP = 5; // IDEEP.
PROTO_HIP = 6; // AMD HIP
PROTO_FPGA = 7; // FPGA
PROTO_ORT = 8; // ONNX Runtime
PROTO_XLA = 9; // XLA / TPU
PROTO_MPS = 10; // MPS
// Change the following number if you add more devices in the code.
PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 11;
}
// Device-specific options. We do not distinguish DeviceOption protos for
// different DeviceTypes, so currently all devices share the same DeviceOption
// proto. Fields that are specific to a device type is ignored if the type does
// not match.
// Note: if you add fields to the DeviceOption, make sure you add the
// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
message DeviceOption {
// [general] Options that need to be carried out before running the execution.
// optional DeviceType device_type = 1 [ default = CPU ];
optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
// [general] Used together with device_type to identify the exact device
optional int32 device_id = 2;
// [general] The random seed to start the device random number generator with.
optional uint32 random_seed = 3;
// [general] What node this op should execute on.
// Used for net transformation purposes. Must be empty at execution time.
optional string node_name = 4;
// [CPU and Linux specific] NUMA node id
optional int32 numa_node_id = 5;
// [general] Extra information passed, not used at execution time currently.
repeated string extra_info = 6;
}
// Operator Definition.
message OperatorDef {
repeated string input = 1; // the name of the input blobs
repeated string output = 2; // the name of output top blobs
optional string name = 3; // the operator name. This is optional.
// the operator type. This is needed to create the object from the operator
// registry.
optional string type = 4;
// arg is for the argument defined in operator schema
repeated Argument arg = 5;
// The device option that the operator should run under.
optional DeviceOption device_option = 6;
// Optionally, one can specify an engine when there are multiple
// implementations available simultaneously for one device type.
// If one specifies an engine but that engine does not exist in the compiled
// Caffe2 binary, Caffe2 will fall back to the default engine of that device
// type.
optional string engine = 7;
// Additional 'fake' inputs used for expressing control dependencies
// in the operator graph. This can be used to ensure that an
// operator does not run until another operator is ready, for e.g.
// scheduling control. These are not passed as actual inputs to the
// Operator implementation, and are only used by the Net class for
// scheduling purposes.
repeated string control_input = 8;
// is_gradient_op argument is only used as a hint in shape inference
// and has no runtime significance
optional bool is_gradient_op = 9 [ default = false ];
// debug information associated with the construction of the operator.
// This is an optional string with no assumed characteristics as
// operators can be constructed in any language.
optional string debug_info = 10;
// the domain of the operator to help runtime distinguish which operator
// library this OperatorDef refers to. For example, both caffe2 and aten
// has `Add` operator, with domain, we can easily decide which operator
// to execute. to support multiple operator libs, we use domain to
// distinguish which operator lib we refer to:
// - "caffe2" means this uses Caffe2 operator library
// - "aten" means this uses ATen operator library
// - "c10" is for the fused library
// - if the domain is missing or empty, we use "caffe2", this is for
// legacy models, new serializer should always export an OperatorDef
// with domain and op_version
optional string domain = 11;
// each operator is has its own version number.
// operator version information
// each time, we change the API or semantics of the operator,
// we bump the version for the operator.
// the runtime system should check the op_version of each OperatorDef
// and decide it should reject or accept the model
optional int64 op_version = 12;
}
// MapFieldEntry follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message MapFieldEntry {
required string key = 1;
required string val = 2;
};
// Used to hold backend-specific options.
message BackendOptions {
// Name of the backend that the specified options apply to.
required string backend_name = 1;
// Flexible map for passing in the options.
repeated MapFieldEntry option = 2;
};
// Partition definition.
message PartitionInfo {
// Name of the partition.
required string name = 1;
// A list of logic device ID, indicating which devices this partition
// can be executed on. If empty, it means the partition won't run on
// device but on host CPU instead.
repeated int32 device_id = 2;
// Extra debug info.
optional string extra_info = 3;
// Flexible map for passing options specific to a backend.
repeated BackendOptions backend_options = 4;
}
// Network definition.
message NetDef {
optional string name = 1; // the network's name
// Operators that the network contains.
// Note: this is not named "operator" because that is a reserved word in C++.
repeated OperatorDef op = 2;
// The type of network that the net should be run with. This routes the
// network instantiation to different execution modes. The default mode,
// "simple", runs the operators in a sequential way as the original Caffe
// implementation does.
optional string type = 3;
// the number of workers, if the operators in the network is to be carried out
// in parallel.
// Note: This is to be deprecated. Using the arg field with "num_workers" as
// key.
// Note 2: The old uses of this were never actually cleaned up
optional int32 num_workers = 4;
// The device option for the network. If a network has a specific device
// option and one of its operators does not have it set, we will copy over the
// device option to the operator. This allows us to basically avoid putting
// device options at every operator.
optional DeviceOption device_option = 5;
repeated Argument arg = 6;
// Two optional fields to declare external input and output of a net.
// If these two are set, when a net is created, we will sanity check for
// every op whether its input is declared (either as an external input,
// or as an intermediate blob created by one of the ops), and sanity check
// if all blobs in external_output are produced.
//
// In cases of memory optimization, declaring external_input and
// external_output also ensures that storage of these blobs are persistent:
// for any blob in external_input and external_output, after a network run
// finishes, their content are actually the right content. Any intermediate
// blobs' contents may be overwritten.
repeated string external_input = 7;
repeated string external_output = 8;
// Partitioning info, indexed by partition names.
repeated PartitionInfo partition_info = 9;
}
// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
// now.
message ExecutionStep {
// ExecutionStep should either contain a set of substeps, or a set of
// network names to run in this execution step. They should NOT both be set
// at the same time.
optional string name = 1;
// An execution step could be recursive, in which it involves a set of
// substeps.
repeated ExecutionStep substep = 2;
// Alternatively, an execution step could involve one or more networks.
// Note that you cannot have both substeps and networks. Choose one.
// Note that an execution step refers networks by their name. The actual
// network definition of the same name should be included in the network field
// of the plan. The reason is that a network object might hold internal states
// (think of a data layer), so we want to have the same network object that
// multiple steps could ask to run.
repeated string network = 3;
// Number of iterations to run this step. The substeps or the networks
// specified will be run sequentially, and one sequential run is considered
// one iteration. If this is not set, the number of iterations is assumed to
// be 1.
optional int64 num_iter = 4;
// Criteria network specifies a single output (TensorCPU<bool>) of
// size (1), is run on every iteration by the executor, and
// execution terminates when the output[0] is `false`.
optional string criteria_network = 5 [ deprecated = true ];
// DEPRECATED. Use `run_every_ms`.
optional string report_net = 7;
optional int32 report_interval = 8;
// If provided, execute this step at every time interval (in millisecs)
// while its sibiling execution steps execute in parallel. This step is
// guaranteed to run at least once after all non-interval siblings finished.
optional int64 run_every_ms = 11;
// If false or not set, execute sub-steps serially.
// If true, execute all substeps concurrently, each one in a separate thread.
optional bool concurrent_substeps = 6;
// Name of a scalar boolean tensor.
// ES checks this blob AFTER every substeps/subnets.
// If specified, and the value is true, then ES will skip the rest and return
// immediately.
// This means that the report_net and the first step will always be called.
// Use cases:
// 1) the first substep stops the rest if data condition not met
// 2) the first substep decide which of the rest of the steps should be run.
// 3) external control
//
// ** It is the user's responsibility to not to put this blob in race
// conditions.
// ** For example when setting this blob in concurrent substeps
optional string should_stop_blob = 9;
// if only_once is true, this step will only be executed once. this ONLY takes
// effect when using should_stop_blob
optional bool only_once = 10;
// Whether to create a child workspace for this step.
// If yes, the workflow and nets are re-created every time this step is run.
optional bool create_workspace = 12;
// How many copies of the children execution steps to run concurrently.
optional int32 num_concurrent_instances = 13;
}
message PlanDef {
// All the networks that are used in this execution. Note that networks should
// be ordered in the way they are executed, i.e. for a layer in a network, all
// its input blobs should already have been initialized by the layers or
// networks defined before it.
optional string name = 1;
// The networks that are going to be used in this plan.
repeated NetDef network = 2;
repeated ExecutionStep execution_step = 3;
}
// Protobuf format for blobs that are not Tensors. We use a key to store the
// type of the blob. For example for a serialized DBProto, the type should
// be "DBReader" and the content should be a serialized DBProto object.
message BlobProto {
optional string name = 1;
optional string type = 2;
optional TensorProto tensor = 3;
optional bytes content = 4;
optional QTensorProto qtensor = 5;
// If blob is not Tensor and is divided into chunks, content_num_chunks
// contains number of chunks, into which blob was divided.
optional int32 content_num_chunks = 6;
optional int32 content_chunk_id = 7;
}
// Protobuf format to serialize DBReader.
message DBReaderProto {
// The name for the DB object in the workspace.
optional string name = 1;
// The source of the DB
optional string source = 2;
// The type of the DB
optional string db_type = 3;
// The current key of the DB if the DB supports seeking.
optional string key = 4;
}
message BlobSerializationOptions {
// This set of options will only apply to blobs whose name matches this
// pattern. If the blob_name_pattern is empty then it will be treated as
// matching all blobs.
optional string blob_name_regex = 1;
// Note:
// - a chunk_size of 0 means "use the default chunk size". The default chunk
// size is controlled by the --caffe2_tensor_chunk_size command line flag.
// - a chunk size of -1 means to disable chunking, and serialize the blob in
// a single chunk.
optional int64 chunk_size = 2;
enum FloatFormat {
// Use the current default serialization format, as chosen by the
// current version of the code. (At the time of writing this is PROTOBUF)
FLOAT_DEFAULT = 0;
// Store the data in the TensorProto's float_data field
FLOAT_PROTOBUF = 1;
// Serialize float values as bfloat16. Note that this conversion is lossy.
FLOAT_BFLOAT16 = 2;
}
// Settings for how to serialize tensors containing float values
optional FloatFormat float_format = 3;
}
message SerializationOptions {
// A set of options to use when serialializing blobs.
// This is a list, sorted from highest to lowest precedence. When
// serializing a blob, the first entry whose blob_name_pattern matches the
// blob name will be used.
repeated BlobSerializationOptions options = 1;
}
|