1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
|
## @package optimizer_test_util
# Module caffe2.python.optimizer_test_util
import unittest
import numpy as np
from caffe2.python import brew, core, workspace, cnn, optimizer
from caffe2.python.modeling.initializers import (
Initializer, PseudoFP16Initializer)
from caffe2.python.model_helper import ModelHelper
class OptimizerTestBase(object):
"""
This is an abstract base class.
Don't inherit from unittest.TestCase, and don't name it 'Test*'.
Do, however, do these things in classes which inherit from this.
"""
def _createDense(self, dtype=core.DataType.FLOAT):
perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
np.random.seed(123) # make test deterministic
numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
initializer = Initializer if dtype == core.DataType.FLOAT else \
PseudoFP16Initializer
data = np.random.randint(
2,
size=(20, perfect_model.size)).astype(numpy_dtype)
label = np.dot(data, perfect_model)[:, np.newaxis]
model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
out = brew.fc(
model,
'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}),
('ConstantFill', {}), axis=0,
WeightInitializer=initializer, BiasInitializer=initializer
)
if dtype == core.DataType.FLOAT16:
out = model.HalfToFloat(out, out + "_fp32")
sq = model.SquaredL2Distance([out, 'label'])
loss = model.AveragedLoss(sq, "avg_loss")
grad_map = model.AddGradientOperators([loss])
self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
return (model, perfect_model, data, label)
def testDense(self):
model, perfect_model, data, label = self._createDense()
optimizer = self.build_optimizer(model)
workspace.FeedBlob('data', data[0])
workspace.FeedBlob('label', label[0])
workspace.RunNetOnce(model.param_init_net)
workspace.CreateNet(model.net, True)
for _ in range(2000):
idx = np.random.randint(data.shape[0])
workspace.FeedBlob('data', data[idx])
workspace.FeedBlob('label', label[idx])
workspace.RunNet(model.net.Proto().name)
np.testing.assert_allclose(
perfect_model[np.newaxis, :],
workspace.FetchBlob('fc_w'),
atol=1e-2
)
self.check_optimizer(optimizer)
@unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
def testGPUDense(self, dtype=core.DataType.FLOAT):
device_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
with core.DeviceScope(device_opt):
model, _perfect_model, data, label = self._createDense(dtype)
if dtype == core.DataType.FLOAT16:
fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host')
model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu')
else:
model.CopyGPUToCPU('fc', 'fc_cpu')
workspace.FeedBlob('data', data[0])
workspace.FeedBlob('label', label[0])
# Add some CPU ops
brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0)
# Create optimizer in default device scope
self.build_optimizer(model)
if self._skip_gpu:
return
# Run net to see it does not crash
workspace.RunNetOnce(model.param_init_net)
workspace.CreateNet(model.net, True)
workspace.RunNet(model.net.Proto().name)
def testSparse(self):
# to test duplicated indices we assign two indices to each weight and
# thus each weight might count once or twice
DUPLICATION = 2
perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
np.random.seed(123) # make test deterministic
data = np.random.randint(
2,
size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
model = cnn.CNNModelHelper("NCHW", name="test")
# imitate what model wrapper does
w = model.param_init_net.ConstantFill(
[], 'w', shape=[perfect_model.size], value=0.0)
model.params.append(w)
picked = model.net.Gather([w, 'indices'], 'gather')
out = model.ReduceFrontSum(picked, 'sum')
sq = model.SquaredL2Distance([out, 'label'])
loss = model.AveragedLoss(sq, "avg_loss")
grad_map = model.AddGradientOperators([loss])
self.assertIsInstance(grad_map['w'], core.GradientSlice)
optimizer = self.build_optimizer(model)
workspace.CreateBlob('indices')
workspace.CreateBlob('label')
for indices_type in [np.int32, np.int64]:
workspace.RunNetOnce(model.param_init_net)
workspace.CreateNet(model.net, True)
for _ in range(2000):
idx = np.random.randint(data.shape[0])
# transform into indices of binary features
indices = np.repeat(np.arange(perfect_model.size),
DUPLICATION)[data[idx] == 1]
if indices.size == 0:
continue
workspace.FeedBlob(
'indices',
indices.reshape((indices.size,)).astype(indices_type)
)
workspace.FeedBlob('label',
np.array(label[idx]).astype(np.float32))
workspace.RunNet(model.net.Proto().name)
np.testing.assert_allclose(
perfect_model,
workspace.FetchBlob('w'),
atol=1e-2
)
self.check_optimizer(optimizer)
class LRModificationTestBase(object):
"""
This is an abstract base class.
Don't inherit from unittest.TestCase, and don't name it 'Test*'.
Do, however, do these things in classes which inherit from this.
"""
def _gradient_ratio_reference(self, model, params, max_gradient_norm):
from caffe2.python import core
sum_squared_norms = 0.0
for param in params:
grad = (
model.param_to_grad[param]
if not isinstance(
model.param_to_grad[param],
core.GradientSlice,
) else model.param_to_grad[param].values
)
val = workspace.FetchBlob(grad)
sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
global_norm = np.sqrt(sum_squared_norms)
clip_norm = max_gradient_norm
norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
return norm_ratio
def test_global_norm_based_gradient_clipping(self):
max_gradient_norm = 1.0
model, perfect_model, data, label = self._createDense()
opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
params = []
for param in model.GetParams(top_scope=True):
if param in model.param_to_grad:
if not isinstance(
model.param_to_grad[param],
core.GradientSlice,
):
params.append(param)
workspace.FeedBlob('data', data[0])
workspace.FeedBlob('label', label[0])
workspace.RunNetOnce(model.param_init_net)
workspace.CreateNet(model.net, True)
self.assertIsNotNone(opt._lr_multiplier)
# Run net once
idx = np.random.randint(data.shape[0])
workspace.FeedBlob('data', data[idx])
workspace.FeedBlob('label', label[idx])
workspace.RunNet(model.net.Proto().name)
reference = self._gradient_ratio_reference(
model,
params,
max_gradient_norm,
)
norm_ratio = workspace.FetchBlob(
'norm_clipped_grad_update/norm_ratio')
np.testing.assert_almost_equal(norm_ratio, reference)
self.assertTrue(
reference < 1.0, "Bad test, gradient not being scaled."
)
def test_lr_injection(self):
model, perfect_model, data, label = self._createDense()
opt = self.build_optimizer(
model, max_gradient_norm=1, allow_lr_injection=True
)
workspace.FeedBlob('data', data[0])
workspace.FeedBlob('label', label[0])
workspace.RunNetOnce(model.param_init_net)
workspace.CreateNet(model.net, True)
# Test LR injection initialized properly
self.assertIsNotNone(opt._lr_multiplier)
self.assertEqual(optimizer.get_lr_injection(), 1)
# Test that we're able to modify the value of the lr_injection
optimizer.set_lr_injection(0)
self.assertEqual(optimizer.get_lr_injection(), 0)
# Test that setting the lr_injector properly propagates to the
# lr_multiplier. Here, we have both lr_injector and norm_ratio that
# affect the lr_multiplier
workspace.RunNet(model.net.Proto().name)
self.assertEqual(workspace.FetchBlob('lr_multiplier'), 0)
|