File: VisionFeaturePrint.proto

package info (click to toggle)

chromium 138.0.7204.183-1

links: PTS, VCS
area: main
in suites: trixie
size: 6,071,908 kB
sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36

file content (65 lines) | stat: -rw-r--r-- 2,150 bytes

parent folder | download | duplicates (9)

// Copyright (c) 2018, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause

syntax = "proto3";
option optimize_for = LITE_RUNTIME;

package CoreML.Specification.CoreMLModels;

/*
 * A model which takes an input image and outputs array(s) of features
 * according to the specified feature types
 */
message VisionFeaturePrint {
  // Specific vision feature print types

  // Scene extracts features useful for identifying contents of natural images
  // in both indoor and outdoor environments
  message Scene {
    enum SceneVersion {
      SCENE_VERSION_INVALID = 0;
      // VERSION_1 is available on iOS,tvOS 12.0+, macOS 10.14+
      // It uses a 299x299 input image and yields a 2048 float feature vector
      SCENE_VERSION_1 = 1;

      // VERSION_2 is available on iOS,tvOS 17.0+, macOS 14.0+
      // It uses a 360x360 input image and yields a 768 float feature vector
      SCENE_VERSION_2 = 2;
    }

    SceneVersion version = 1;
  }

  // Objects extracts features useful for identifying and localizing
  // objects in natural images
  message Objects {
    enum ObjectsVersion {
      OBJECTS_VERSION_INVALID = 0;
      // VERSION_1 is available on iOS,tvOS 14.0+, macOS 11.0+
      // It uses a 299x299 input image and yields two multiarray
      // features: one at high resolution of shape (288, 35, 35)
      // the other at low resolution of shape (768, 17, 17)
      OBJECTS_VERSION_1 = 1;
    }

    ObjectsVersion version = 1;

    /*
     * Stores the names of the output features according to the
     * order of them being computed from the neural network, i.e.,
     * the first element in the output is the earliest being
     * computed, while the last is the latest being computed. In
     * general, the order reflects the resolution of the feature.
     * The earlier it is computed, the higher the feature resolution.
     */
    repeated string output = 100;
  }

  // Vision feature print type
  oneof VisionFeaturePrintType {
    Scene scene = 20;
    Objects objects = 21;
  }
}