1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
from caffe2.python import schema
from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
import numpy as np
class SemiRandomFeatures(ArcCosineFeatureMap):
"""
Implementation of the semi-random kernel feature map.
Applies H(x_rand) * x_rand^s * x_learned, where
H is the Heaviside step function,
x_rand is the input after applying FC with randomized parameters,
and x_learned is the input after applying FC with learnable parameters.
If using multilayer model with semi-random layers, then input and output records
should have a 'full' and 'random' Scalar. The random Scalar will be passed as
input to process the random features.
For more information, see the original paper:
https://arxiv.org/pdf/1702.08882.pdf
Inputs :
output_dims -- dimensions of the output vector
s -- if s == 0, will obtain linear semi-random features;
else if s == 1, will obtain squared semi-random features;
else s >= 2, will obtain higher order semi-random features
scale_random -- amount to scale the standard deviation
(for random parameter initialization when weight_init or
bias_init hasn't been specified)
scale_learned -- amount to scale the standard deviation
(for learned parameter initialization when weight_init or
bias_init hasn't been specified)
weight_init_random -- initialization distribution for random weight parameter
(if None, will use Gaussian distribution)
bias_init_random -- initialization distribution for random bias pararmeter
(if None, will use Uniform distribution)
weight_init_learned -- initialization distribution for learned weight parameter
(if None, will use Gaussian distribution)
bias_init_learned -- initialization distribution for learned bias pararmeter
(if None, will use Uniform distribution)
weight_optim -- optimizer for weight params for learned features
bias_optim -- optimizer for bias param for learned features
set_weight_as_global_constant -- if True, initialized random parameters
will be constant across all distributed
instances of the layer
"""
def __init__(
self,
model,
input_record,
output_dims,
s=1,
scale_random=1.0,
scale_learned=1.0,
weight_init_random=None,
bias_init_random=None,
weight_init_learned=None,
bias_init_learned=None,
weight_optim=None,
bias_optim=None,
set_weight_as_global_constant=False,
name='semi_random_features',
**kwargs):
if isinstance(input_record, schema.Struct):
schema.is_schema_subset(
schema.Struct(
('full', schema.Scalar()),
('random', schema.Scalar()),
),
input_record
)
self.input_record_full = input_record.full
self.input_record_random = input_record.random
elif isinstance(input_record, schema.Scalar):
self.input_record_full = input_record
self.input_record_random = input_record
super(SemiRandomFeatures, self).__init__(
model,
self.input_record_full,
output_dims,
s=s,
scale=scale_random, # To initialize the random parameters
weight_init=weight_init_random,
bias_init=bias_init_random,
weight_optim=None,
bias_optim=None,
set_weight_as_global_constant=set_weight_as_global_constant,
initialize_output_schema=False,
name=name,
**kwargs)
self.output_schema = schema.Struct(
('full', schema.Scalar(
(np.float32, output_dims),
model.net.NextScopedBlob(name + '_full_output')
),),
('random', schema.Scalar(
(np.float32, output_dims),
model.net.NextScopedBlob(name + '_random_output')
),),
)
# To initialize the learnable parameters
assert (scale_learned > 0.0), \
"Expected scale (learned) > 0, got %s" % scale_learned
self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims)
# Learned Parameters
(self.learned_w, self.learned_b) = self._initialize_params(
'learned_w',
'learned_b',
w_init=weight_init_learned,
b_init=bias_init_learned,
w_optim=weight_optim,
b_optim=bias_optim
)
def add_ops(self, net):
# Learned features: wx + b
learned_features = net.FC(self.input_record_full.field_blobs() +
[self.learned_w, self.learned_b],
net.NextScopedBlob('learned_features'))
# Random features: wx + b
random_features = net.FC(self.input_record_random.field_blobs() +
[self.random_w, self.random_b],
net.NextScopedBlob('random_features'))
processed_random_features = self._heaviside_with_power(
net,
random_features,
self.output_schema.random.field_blobs(),
self.s
)
net.Mul([processed_random_features, learned_features],
self.output_schema.full.field_blobs())
|