File: test_feed_forward.py

package info (click to toggle)
python-thinc 9.1.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,896 kB
  • sloc: python: 17,122; javascript: 1,559; ansic: 342; makefile: 15; sh: 13
file content (196 lines) | stat: -rw-r--r-- 5,441 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
from functools import partial

import numpy
import pytest
from numpy.testing import assert_allclose

from thinc.api import Linear, NumpyOps, Relu, chain


@pytest.fixture(params=[1, 2, 9])
def nB(request):
    return request.param


@pytest.fixture(params=[1, 6])
def nI(request):
    return request.param


@pytest.fixture(params=[1, 5, 3])
def nH(request):
    return request.param


@pytest.fixture(params=[1, 2, 7, 9])
def nO(request):
    return request.param


@pytest.fixture
def model1(nH, nI):
    model = Relu(nH, nI).initialize()
    return model


@pytest.fixture
def model2(nO, nH):
    model = Linear(nO, nH).initialize()
    return model


@pytest.fixture
def input_data(nB, nI):
    return numpy.ones((nB, nI), dtype="f") + 1.0


@pytest.fixture
def gradient_data(nB, nO):
    return numpy.zeros((nB, nO), dtype="f") - 1.0


@pytest.fixture
def model(model1, model2):
    return chain(model1, model2).initialize()


def get_expected_predict(input_data, Ws, bs):
    numpy_ops = NumpyOps()
    X = input_data
    for i, (W, b) in enumerate(zip(Ws, bs)):
        X = numpy_ops.asarray(X)
        if i > 0:
            X *= X > 0
        X = numpy.tensordot(X, W, axes=[[1], [1]]) + b
    return X


def numeric_gradient(predict, weights, epsilon=1e-4):
    out1 = predict(weights + epsilon)
    out2 = predict(weights - epsilon)
    return (out1 - out2) / (2 * epsilon)


def test_models_have_shape(model1, model2, nI, nH, nO):
    assert model1.get_param("W").shape == (nH, nI)
    assert model1.get_param("b").shape == (nH,)
    assert model2.get_param("W").shape == (nO, nH)
    assert model2.get_param("b").shape == (nO,)


def test_model_shape(model, model1, model2, nI, nH, nO):
    assert model.get_dim("nI") == model1.get_dim("nI")
    assert model.get_dim("nO") == model2.get_dim("nO")


def test_infer_output_shape():
    model = Relu(dropout=0.2)
    X = model.ops.alloc2f(4, 5)
    Y = model.ops.alloc2f(4, 2)
    assert model.has_dim("nI") is None
    assert model.has_dim("nO") is None
    model.initialize(X=X, Y=Y)
    assert model.get_dim("nI") == 5
    assert model.get_dim("nO") == 2


def test_predict_and_begin_update_match(model, model1, model2, input_data):
    model = chain(model1, model2)
    via_predict = model.predict(input_data)
    via_update, _ = model.begin_update(input_data)
    assert_allclose(via_predict, via_update)
    expected = get_expected_predict(
        input_data,
        [model1.get_param("W"), model2.get_param("W")],
        [model1.get_param("b"), model2.get_param("b")],
    )
    assert_allclose(via_update, expected, atol=1e-2, rtol=1e-4)


def test_init_functions_are_called():
    init_was_called = {}

    def register_init(name, model, X=None, Y=None):
        init_was_called[name] = True

    layer1 = Linear(5)
    layer2 = Linear(5)
    layer3 = Linear(5)
    layer1.init = partial(register_init, "one")
    layer2.init = partial(register_init, "two")
    layer3.init = partial(register_init, "three")
    # This is the nesting we'll get from operators.
    model = chain(layer1, chain(layer2, layer3))
    assert not init_was_called
    model.initialize()
    assert init_was_called["one"]
    assert init_was_called["two"]
    assert init_was_called["three"]


class GradientSpy(object):
    def __init__(self):
        self.weights = None
        self.d_weights = None

    def __call__(self, weights, grad):
        self.weights = weights
        self.d_weights = grad


# I don't know how to get this working properly after the refactor. It's a numeric
# gradient check. I suspect the test is the problem, not the code.
@pytest.mark.skip
# This is the actual definition -- it's just annoying to see tonnes of skips.
# def test_gradient(model, input_data, nB, nH, nI, nO):
def test_gradient():
    truth = numpy.zeros((nB, nO), dtype="float32")
    truth[0] = 1.0

    guess, backprop = model.begin_update(input_data)
    backprop(guess - truth)

    for layer in model.layers:
        for name in layer.param_names:
            agrad = layer.get_grad(name).ravel()  # Should have grads for all params.
            predict = get_predict(layer, name, input_data)
            ngrad = get_numeric_gradient(predict, agrad.size, truth)
            assert_allclose(agrad, ngrad, atol=0.2, rtol=0.2)


def get_predict(layer, param_name, inputs):
    """Helper for gradient check. To do the numeric gradient check, we have
    to be able to wiggle one value in a parameter, and check the prediction
    before and after. So we need to get a callback that gives an output
    given a change to one weight.
    """

    def predict(i, epsilon):
        param = layer.get_param(param_name)
        shape = param.shape
        param = param.ravel()
        param[i] += epsilon
        layer.set_param(param_name, param.reshape(shape))
        outputs = layer.predict(inputs)
        param[i] -= epsilon
        layer.set_param(param_name, param.reshape(shape))
        return outputs.reshape(shape)

    return predict


def get_numeric_gradient(predict, n, target):
    gradient = numpy.zeros(n)
    for i in range(n):
        out1 = predict(i, 1e-4)
        out2 = predict(i, -1e-4)

        err1 = _get_loss(out1, target)
        err2 = _get_loss(out2, target)
        gradient[i] = (err1 - err2) / (2 * 1e-4)
        print("NGrad", i, err1, err2)
    return gradient


def _get_loss(truth, guess):
    return numpy.sum(numpy.sum(0.5 * numpy.square(truth - guess), 1))