File: BayesianProbitRegressor.proto

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (148 lines) | stat: -rw-r--r-- 4,869 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Copyright (c) 2017, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause

syntax = "proto3";
option optimize_for = LITE_RUNTIME;

package CoreML.Specification;

/*
 * A Bayesian probit regressor.
 *
 * The probit regression model is superficially similar to the more commonly
 * known logistic regression, with sampling distribution of the model given by
 *
 *    P(y=+1|x,w) = Φ(<w,x>/β)
 *
 * where w are the set of weights,
 *       x are the set of features for the given event,
 *       β is a model hyper-parameter, and
 *       Φ is the link function, defined to be the CDF of the normal
 * distribution. The weights w[i,j] are Gaussian distributed, with mean μ[i,j]
 * and precision 1/(σ[i,j])^2 (where i indexes over features and j indexes over
 * the values for the feature). The parameter β scales the steepness of the
 * inverse link function.
 *
 * (see https://en.wikipedia.org/wiki/Probit_model and
 * https://en.wikipedia.org/wiki/Logistic_regression for more details on probit
 * model and logistic regression, respectively)
 *
 * Input: X
 *   x represents a set of features, each taking on a discrete value (note that
 * continuous values would first need to be discretized). x can be represented
 * as a vector where the index i is the feature id and x[i] is the feature
 * value. Alternatively, x can be represented as a matrix with 2 columns where
 * the first column indicates the feature id and the second column contains the
 * feature values, i.e. x[i,0] is the feature id and x[i,1] is the feature
 * value.
 *
 *   additional input features:
 *   - "optimism": apply a mean shift to the probability, i.e. shift regression
 * mean by o*stdev, where o is the "optimism" parameter (see additional output
 * features)
 *   - "samplingScale": for sampling from posterior, multiply standard deviation
 * by this factor
 *   - "samplingTruncation": for sampling from posterior, truncate sampling
 * distribution at given multiple of std from mean
 *
 * Output: Y
 *   probability P(y|x,w)
 *
 *   additional output features:
 *   - mean (regression output before applying link function)
 *   - variance (regression output variance before applying link function)
 *   - pessimistic probability: P(y|x,w) with a mean shift parameterized by
 * "optimism" feature
 *   - sampled probability: p ~ P(y|x,w) with standard deviation scaling
 * parametrized by "samplingScale" feature and distribution truncated at
 * multiple of standard deviation, where multiple parameterized by
 * "samplingTruncation" feature.
 *
 */

message BayesianProbitRegressor {
  /*
   * Parameterization of a Gaussian distribution
   */
  message Gaussian {
    double mean = 1;
    double precision = 2;  // inverse of the variance
  }

  /*
   * Weight for a specific feature value
   * The weight is represented as a Gaussian distribution
   * with a mean and precision (1/variance) to capture
   * uncertainty in the weight
   */
  message FeatureValueWeight {
    uint32 featureValue = 1;
    Gaussian featureWeight = 2;
  }

  /*
   * Feature with associated weights (for different values)
   * Each feature has a set of weights for the (discrete) values
   * it can take
   */
  message FeatureWeight {
    uint32 featureId = 1;
    repeated FeatureValueWeight weights = 2;
  }

  uint32 numberOfFeatures = 1;

  Gaussian bias = 2;  // bias term

  /*
   * Set of features with associated weights
   */
  repeated FeatureWeight features = 3;  // feature weights

  /*
   * Set this name to be the same as input feature of type multi-array (1D)
   * in the model description you want to use as the regression input
   */
  string regressionInputFeatureName = 10;

  /*
   * Set this name to be the same as optional input feature of type double
   * in the model description you want to use as the optimism input
   */
  string optimismInputFeatureName = 11;

  /*
   * Set this name to be the same as optional input feature of type double
   * in the model description you want to use as the samplingScale input
   */
  string samplingScaleInputFeatureName = 12;

  /*
   * Set this name to be the same as optional input feature of type double
   * in the model description you want to use as the samplingBounds input
   */
  string samplingTruncationInputFeatureName = 13;

  /*
   * name of 'mean' output feature
   */
  string meanOutputFeatureName = 20;

  /*
   * name of 'variance' output feature
   */
  string varianceOutputFeatureName = 21;

  /*
   * name of 'pessimistic' output feature
   */
  string pessimisticProbabilityOutputFeatureName = 22;

  /*
   * name of 'sampled' output feature: samples from the scaled posterior
   * probability distribuiton
   */
  string sampledProbabilityOutputFeatureName = 23;
}