1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
|
from functools import partial
import numpy
import pytest
from numpy.testing import assert_allclose
from thinc.api import Linear, NumpyOps, Relu, chain
@pytest.fixture(params=[1, 2, 9])
def nB(request):
return request.param
@pytest.fixture(params=[1, 6])
def nI(request):
return request.param
@pytest.fixture(params=[1, 5, 3])
def nH(request):
return request.param
@pytest.fixture(params=[1, 2, 7, 9])
def nO(request):
return request.param
@pytest.fixture
def model1(nH, nI):
model = Relu(nH, nI).initialize()
return model
@pytest.fixture
def model2(nO, nH):
model = Linear(nO, nH).initialize()
return model
@pytest.fixture
def input_data(nB, nI):
return numpy.ones((nB, nI), dtype="f") + 1.0
@pytest.fixture
def gradient_data(nB, nO):
return numpy.zeros((nB, nO), dtype="f") - 1.0
@pytest.fixture
def model(model1, model2):
return chain(model1, model2).initialize()
def get_expected_predict(input_data, Ws, bs):
numpy_ops = NumpyOps()
X = input_data
for i, (W, b) in enumerate(zip(Ws, bs)):
X = numpy_ops.asarray(X)
if i > 0:
X *= X > 0
X = numpy.tensordot(X, W, axes=[[1], [1]]) + b
return X
def numeric_gradient(predict, weights, epsilon=1e-4):
out1 = predict(weights + epsilon)
out2 = predict(weights - epsilon)
return (out1 - out2) / (2 * epsilon)
def test_models_have_shape(model1, model2, nI, nH, nO):
assert model1.get_param("W").shape == (nH, nI)
assert model1.get_param("b").shape == (nH,)
assert model2.get_param("W").shape == (nO, nH)
assert model2.get_param("b").shape == (nO,)
def test_model_shape(model, model1, model2, nI, nH, nO):
assert model.get_dim("nI") == model1.get_dim("nI")
assert model.get_dim("nO") == model2.get_dim("nO")
def test_infer_output_shape():
model = Relu(dropout=0.2)
X = model.ops.alloc2f(4, 5)
Y = model.ops.alloc2f(4, 2)
assert model.has_dim("nI") is None
assert model.has_dim("nO") is None
model.initialize(X=X, Y=Y)
assert model.get_dim("nI") == 5
assert model.get_dim("nO") == 2
def test_predict_and_begin_update_match(model, model1, model2, input_data):
model = chain(model1, model2)
via_predict = model.predict(input_data)
via_update, _ = model.begin_update(input_data)
assert_allclose(via_predict, via_update)
expected = get_expected_predict(
input_data,
[model1.get_param("W"), model2.get_param("W")],
[model1.get_param("b"), model2.get_param("b")],
)
assert_allclose(via_update, expected, atol=1e-2, rtol=1e-4)
def test_init_functions_are_called():
init_was_called = {}
def register_init(name, model, X=None, Y=None):
init_was_called[name] = True
layer1 = Linear(5)
layer2 = Linear(5)
layer3 = Linear(5)
layer1.init = partial(register_init, "one")
layer2.init = partial(register_init, "two")
layer3.init = partial(register_init, "three")
# This is the nesting we'll get from operators.
model = chain(layer1, chain(layer2, layer3))
assert not init_was_called
model.initialize()
assert init_was_called["one"]
assert init_was_called["two"]
assert init_was_called["three"]
class GradientSpy(object):
def __init__(self):
self.weights = None
self.d_weights = None
def __call__(self, weights, grad):
self.weights = weights
self.d_weights = grad
# I don't know how to get this working properly after the refactor. It's a numeric
# gradient check. I suspect the test is the problem, not the code.
@pytest.mark.skip
# This is the actual definition -- it's just annoying to see tonnes of skips.
# def test_gradient(model, input_data, nB, nH, nI, nO):
def test_gradient():
truth = numpy.zeros((nB, nO), dtype="float32")
truth[0] = 1.0
guess, backprop = model.begin_update(input_data)
backprop(guess - truth)
for layer in model.layers:
for name in layer.param_names:
agrad = layer.get_grad(name).ravel() # Should have grads for all params.
predict = get_predict(layer, name, input_data)
ngrad = get_numeric_gradient(predict, agrad.size, truth)
assert_allclose(agrad, ngrad, atol=0.2, rtol=0.2)
def get_predict(layer, param_name, inputs):
"""Helper for gradient check. To do the numeric gradient check, we have
to be able to wiggle one value in a parameter, and check the prediction
before and after. So we need to get a callback that gives an output
given a change to one weight.
"""
def predict(i, epsilon):
param = layer.get_param(param_name)
shape = param.shape
param = param.ravel()
param[i] += epsilon
layer.set_param(param_name, param.reshape(shape))
outputs = layer.predict(inputs)
param[i] -= epsilon
layer.set_param(param_name, param.reshape(shape))
return outputs.reshape(shape)
return predict
def get_numeric_gradient(predict, n, target):
gradient = numpy.zeros(n)
for i in range(n):
out1 = predict(i, 1e-4)
out2 = predict(i, -1e-4)
err1 = _get_loss(out1, target)
err2 = _get_loss(out2, target)
gradient[i] = (err1 - err2) / (2 * 1e-4)
print("NGrad", i, err1, err2)
return gradient
def _get_loss(truth, guess):
return numpy.sum(numpy.sum(0.5 * numpy.square(truth - guess), 1))
|