1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
|
from caffe2.python import schema
from caffe2.python.layers.layers import ModelLayer
import numpy as np
class ArcCosineFeatureMap(ModelLayer):
"""
A general version of the arc-cosine kernel feature map (s = 1 restores
the original arc-cosine kernel feature map).
Applies H(x) * x^s, where H is the Heaviside step function and x is the
input after applying FC (such that x = w * x_orig + b).
For more information, see the original paper:
http://cseweb.ucsd.edu/~saul/papers/nips09_kernel.pdf
Inputs :
output_dims -- dimensions of the output vector
s -- degree to raise transformed features
scale -- amount to scale the standard deviation
weight_init -- initialization distribution for weight parameter
bias_init -- initialization distribution for bias pararmeter
weight_optim -- optimizer for weight params; None for random features
bias_optim -- optimizer for bias param; None for random features
set_weight_as_global_constant -- if True, initialized random parameters
will be constant across all distributed
instances of the layer
initialize_output_schema -- if True, initialize output schema as Scalar
from Arc Cosine; else output schema is None
"""
def __init__(
self,
model,
input_record,
output_dims,
s=1,
scale=1.0,
weight_init=None,
bias_init=None,
weight_optim=None,
bias_optim=None,
set_weight_as_global_constant=False,
initialize_output_schema=True,
name='arc_cosine_feature_map',
**kwargs):
super(ArcCosineFeatureMap, self).__init__(model, name, input_record,
**kwargs)
assert isinstance(input_record, schema.Scalar), "Incorrect input type"
self.params = []
self.model = model
self.set_weight_as_global_constant = set_weight_as_global_constant
self.input_dims = input_record.field_type().shape[0]
assert self.input_dims >= 1, "Expected input dimensions >= 1, got %s" \
% self.input_dims
if initialize_output_schema:
self.output_schema = schema.Scalar(
(np.float32, (output_dims, )),
model.net.NextScopedBlob(name + '_output')
)
self.output_dims = output_dims
assert self.output_dims >= 1, "Expected output dimensions >= 1, got %s" \
% self.output_dims
self.s = s
assert (self.s >= 0), "Expected s >= 0, got %s" % self.s
assert isinstance(self.s, int), "Expected s to be type int, got type %s" \
% type(self.s)
assert (scale > 0.0), "Expected scale > 0, got %s" % scale
self.stddev = scale * np.sqrt(1.0 / self.input_dims)
# Initialize train_init_net parameters
# Random Parameters
if set_weight_as_global_constant:
w_init = np.random.normal(scale=self.stddev,
size=(self.output_dims, self.input_dims))
b_init = np.random.uniform(low=-0.5 * self.stddev,
high=0.5 * self.stddev,
size=self.output_dims)
self.random_w = self.model.add_global_constant(
name=self.name + "_fixed_rand_W",
array=w_init
)
self.random_b = self.model.add_global_constant(
name=self.name + "_fixed_rand_b",
array=b_init
)
else:
(self.random_w, self.random_b) = self._initialize_params(
'random_w',
'random_b',
w_init=weight_init,
b_init=bias_init,
w_optim=weight_optim,
b_optim=bias_optim
)
def _initialize_params(self, w_name, b_name, w_init=None, b_init=None,
w_optim=None, b_optim=None):
"""
Initializes the Layer Parameters for weight and bias terms for features
Inputs :
w_blob -- blob to contain w values
b_blob -- blob to contain b values
w_init -- initialization distribution for weight parameter
b_init -- initialization distribution for bias parameter
w_optim -- optimizer to use for w; if None, then will use no optimizer
b_optim -- optimizer to user for b; if None, then will use no optimizer
"""
w_init = w_init if w_init else (
'GaussianFill', {'mean': 0.0, 'std': self.stddev}
)
w_optim = w_optim if w_optim else self.model.NoOptim
b_init = b_init if b_init else (
'UniformFill', {'min': -0.5 * self.stddev, 'max': 0.5 * self.stddev}
)
b_optim = b_optim if b_optim else self.model.NoOptim
w_param = self.create_param(param_name=w_name,
shape=(self.output_dims, self.input_dims),
initializer=w_init,
optimizer=w_optim)
b_param = self.create_param(param_name=b_name,
shape=[self.output_dims],
initializer=b_init,
optimizer=b_optim)
return [w_param, b_param]
def _heaviside_with_power(self, net, input_features, output_blob, s):
"""
Applies Heaviside step function and Relu / exponentiation to features
depending on the value of s.
Inputs:
net -- net with operators
input_features -- features to processes
output_blob -- output blob reference
s -- degree to raise the transformed features
"""
if s == 0:
softsign_features = net.Softsign([input_features],
net.NextScopedBlob('softsign'))
return net.Relu(softsign_features, output_blob)
elif s == 1:
return net.Relu([input_features],
output_blob)
else:
relu_features = net.Relu([input_features],
net.NextScopedBlob('relu_rand'))
pow_features = net.Pow([input_features],
net.NextScopedBlob('pow_rand'),
exponent=float(s - 1))
return net.Mul([relu_features, pow_features],
output_blob)
def add_ops(self, net):
input_blob = self.input_record.field_blobs()
# Random features: wx + b
random_features = net.FC(input_blob + [self.random_w, self.random_b],
net.NextScopedBlob('random_features'))
# Process random features
self._heaviside_with_power(net,
random_features,
self.output_schema.field_blobs(),
self.s)
|