File: explicit_resnet_param_update.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (65 lines) | stat: -rw-r--r-- 2,268 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65





from caffe2.python import workspace, core
from caffe2.proto import caffe2_pb2


def gen_param_update_builder_fun(self, model, dataset, is_train):
    if not is_train:
        return None
    else:
        # from sherlok
        for idx in range(self.opts['distributed']['first_xpu_id'],
                         self.opts['distributed']['first_xpu_id'] +
                         self.opts['distributed']['num_xpus']):
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, idx)):
                workspace.CreateBlob('{}_{}/lr'.
                    format(self.opts['distributed']['device'], idx))

        def add_parameter_update_ops(model):
            model.Iter("ITER")
            weight_decay = model.param_init_net.ConstantFill(
                [], 'weight_decay', shape=[1],
                value=self.opts['model_param']['weight_decay']
            )
            weight_decay_bn = model.param_init_net.ConstantFill(
                [], 'weight_decay_bn', shape=[1],
                value=self.opts['model_param']['weight_decay_bn']
            )
            one = model.param_init_net.ConstantFill(
                [], "ONE", shape=[1], value=1.0
            )

            '''
            Add the momentum-SGD update.
            '''
            params = model.GetParams()
            assert(len(params) > 0)

            for param in params:
                param_grad = model.param_to_grad[param]
                param_momentum = model.param_init_net.ConstantFill(
                    [param], param + '_momentum', value=0.0
                )

                if '_bn' in str(param):
                    model.WeightedSum(
                        [param_grad, one, param, weight_decay_bn], param_grad
                    )
                else:
                    model.WeightedSum(
                        [param_grad, one, param, weight_decay], param_grad
                    )

                # Update param_grad and param_momentum in place
                model.net.MomentumSGDUpdate(
                    [param_grad, param_momentum, 'lr', param],
                    [param_grad, param_momentum, param],
                    momentum=0.9,
                    nesterov=1
                )

        return add_parameter_update_ops