1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
|
## @package fc_with_bootstrap
# Module caffe2.python.layers.fc_with_bootstrap
import math
import numpy as np
from caffe2.python import core, schema
from caffe2.python.helpers.arg_scope import get_current_scope
from caffe2.python.layers.layers import ModelLayer
from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin
def get_fc_predictor_version(fc_version):
assert fc_version in ["fp32"], (
"Only support fp32 for the fully connected layer "
"in the predictor net, the provided FC precision is {}".format(fc_version)
)
return fc_version
class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
def __init__(
self,
model,
input_record,
output_dims,
num_bootstrap,
weight_init=None,
bias_init=None,
weight_optim=None,
bias_optim=None,
name="fc_with_bootstrap",
weight_reg=None,
bias_reg=None,
clip_param=None,
axis=1,
**kwargs
):
super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs)
assert isinstance(
input_record, schema.Scalar
), "Incorrect input type {}".format(input_record)
assert (
len(input_record.field_types()[0].shape) > 0
), "FC expects limited dimensions of the input tensor"
assert axis >= 1, "axis {} should >= 1.".format(axis)
self.axis = axis
input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])
assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
input_dims
)
self.clip_args = None
# attributes for bootstrapping below
self.num_bootstrap = num_bootstrap
# input dim shape
self.input_dims = input_dims
# bootstrapped fully-connected layers to be used in eval time
self.bootstrapped_FCs = []
# scalar containing batch_size blob so that we don't need to recompute
self.batch_size = None
# we want this to be the last FC, so the output_dim should be 1, set to None
self.output_dim_vec = None
# lower bound when creating random indices
self.lower_bound = None
# upper bound when creating random indices
self.upper_bound = None
if clip_param is not None:
assert len(clip_param) == 2, (
"clip_param must be a tuple / list "
"of length 2 and in the form of (clip_min, clip max)"
)
clip_min, clip_max = clip_param
assert (
clip_min is not None or clip_max is not None
), "clip_min, and clip_max in clip_param cannot both be None"
assert (
clip_min is None or clip_max is None
) or clip_min < clip_max, (
"clip_param = [clip_min, clip_max] must have clip_min < clip_max"
)
self.clip_args = {}
if clip_min is not None:
self.clip_args["min"] = clip_min
if clip_max is not None:
self.clip_args["max"] = clip_max
scale = math.sqrt(1.0 / input_dims)
weight_init = (
weight_init
if weight_init
else ("UniformFill", {"min": -scale, "max": scale})
)
bias_init = (
bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
)
"""
bootstrapped FCs:
Ex: [
bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
...,
...,
bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
]
output_schema:
Note: indices will always be on even indices.
Ex: Struct(
indices_0_blob,
preds_0_blob,
...
...
indices_b_blob,
preds_b_blob
)
"""
bootstrapped_FCs = []
output_schema = schema.Struct()
for i in range(num_bootstrap):
output_schema += schema.Struct(
(
"bootstrap_iteration_{}/indices".format(i),
self.get_next_blob_reference(
"bootstrap_iteration_{}/indices".format(i)
),
),
(
"bootstrap_iteration_{}/preds".format(i),
self.get_next_blob_reference(
"bootstrap_iteration_{}/preds".format(i)
),
),
)
self.bootstrapped_FCs.extend(
[
self.create_param(
param_name="bootstrap_iteration_{}/w".format(i),
shape=[output_dims, input_dims],
initializer=weight_init,
optimizer=weight_optim,
regularizer=weight_reg,
),
self.create_param(
param_name="bootstrap_iteration_{}/b".format(i),
shape=[output_dims],
initializer=bias_init,
optimizer=bias_optim,
regularizer=bias_reg,
),
]
)
self.output_schema = output_schema
if axis == 1:
output_shape = (output_dims,)
else:
output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
output_shape = tuple(output_shape + [output_dims])
def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
"""
Args:
net: the caffe2 net to insert operator
copied_cur_layer: blob of the bootstrapped features (make sure this
blob has a stop_gradient on)
iteration: the bootstrap interation to generate for. Used to correctly
populate the output_schema
Return:
A blob containing the generated indices of shape: (batch_size,)
"""
with core.NameScope("bootstrap_iteration_{}".format(iteration)):
if iteration == 0:
# capture batch_size once for efficiency
input_shape = net.Shape(copied_cur_layer, "input_shape")
batch_size_index = net.Const(np.array([0]), "batch_size_index")
batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
self.batch_size = batch_size
lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
offset = net.Const(np.array([1]), "offset", dtype=np.int32)
int_batch_size = net.Cast(
[self.batch_size], "int_batch_size", to=core.DataType.INT32
)
upper_bound = net.Sub([int_batch_size, offset], "upper_bound")
self.lower_bound = lower_bound
self.upper_bound = upper_bound
indices = net.UniformIntFill(
[self.batch_size, self.lower_bound, self.upper_bound],
self.output_schema[iteration * 2].field_blobs()[0],
input_as_shape=1,
)
return indices
def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
"""
This method contains all the bootstrapping logic used to bootstrap
the features. Only used by the train_net.
Args:
net: the caffe2 net to insert bootstrapping operators
copied_cur_layer: the blob representing the current features.
Note, this layer should have a stop_gradient on it.
Returns:
bootstrapped_features: blob of bootstrapped version of cur_layer
with same dimensions
"""
# draw features based upon the bootstrapped indices
bootstrapped_features = net.Gather(
[copied_cur_layer, indices],
net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
)
bootstrapped_features = schema.Scalar(
(np.float32, self.input_dims), bootstrapped_features
)
return bootstrapped_features
def _insert_fc_ops(self, net, features, params, outputs, version):
"""
Args:
net: the caffe2 net to insert operator
features: Scalar containing blob of the bootstrapped features or
actual cur_layer features
params: weight and bias for FC
outputs: the output blobs
version: support fp32 for now.
"""
if version == "fp32":
pred_blob = net.FC(
features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
)
return pred_blob
else:
raise Exception("unsupported FC type version {}".format(version))
def _add_ops(self, net, features, iteration, params, version):
"""
Args:
params: the weight and bias, passed by either add_ops or
add_train_ops function
features: feature blobs to predict on. Can be the actual cur_layer
or the bootstrapped_feature blobs.
version: currently fp32 support only
"""
if self.clip_args is not None:
clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
for p, cp in zip(params, clipped_params):
net.Clip([p], [cp], **self.clip_args)
params = clipped_params
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
self._insert_fc_ops(
net=net,
features=features,
params=params,
outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
version=version,
)
def add_ops(self, net):
"""
Both the predict net and the eval net will call this function.
For bootstrapping approach, the goal is to pass the cur_layer feature
inputs through all the bootstrapped FCs that are stored under
self.bootstrapped_FCs. Return the preds in the same output_schema
with dummy indices (because they are not needed).
"""
version_info = get_current_scope().get(
get_fc_predictor_version.__name__, {"fc_version": "fp32"}
)
predictor_fc_fp_version = version_info["fc_version"]
for i in range(self.num_bootstrap):
# these are dummy indices, not to be used anywhere
indices = self._generate_bootstrapped_indices(
net=net,
copied_cur_layer=self.input_record.field_blobs()[0],
iteration=i,
)
params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]
self._add_ops(
net=net,
features=self.input_record,
params=params,
iteration=i,
version=predictor_fc_fp_version,
)
def add_train_ops(self, net):
# use the train_param_blobs to be consistent with the SamplingTrain unittest
# obtain features
for i in range(self.num_bootstrap):
indices = self._generate_bootstrapped_indices(
net=net,
copied_cur_layer=self.input_record.field_blobs()[0],
iteration=i,
)
bootstrapped_features = self._bootstrap_ops(
net=net,
copied_cur_layer=self.input_record.field_blobs()[0],
indices=indices,
iteration=i,
)
self._add_ops(
net,
features=bootstrapped_features,
iteration=i,
params=self.train_param_blobs[i * 2 : (i * 2) + 2],
version="fp32",
)
def get_fp16_compatible_parameters(self):
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
return [
blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0
]
else:
raise Exception(
"Currently only supports functionality for output_dim_vec == 1"
)
@property
def param_blobs(self):
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
return self.bootstrapped_FCs
else:
raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")
|