File: semi_random_features.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (144 lines) | stat: -rw-r--r-- 5,809 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144





from caffe2.python import schema
from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
import numpy as np


class SemiRandomFeatures(ArcCosineFeatureMap):
    """
    Implementation of the semi-random kernel feature map.

    Applies H(x_rand) * x_rand^s * x_learned, where
        H is the Heaviside step function,
        x_rand is the input after applying FC with randomized parameters,
        and x_learned is the input after applying FC with learnable parameters.

    If using multilayer model with semi-random layers, then input and output records
    should have a 'full' and 'random' Scalar. The random Scalar will be passed as
    input to process the random features.

    For more information, see the original paper:
        https://arxiv.org/pdf/1702.08882.pdf

    Inputs :
        output_dims -- dimensions of the output vector
        s -- if s == 0, will obtain linear semi-random features;
             else if s == 1, will obtain squared semi-random features;
             else s >= 2, will obtain higher order semi-random features
        scale_random -- amount to scale the standard deviation
                        (for random parameter initialization when weight_init or
                        bias_init hasn't been specified)
        scale_learned -- amount to scale the standard deviation
                        (for learned parameter initialization when weight_init or
                        bias_init hasn't been specified)

        weight_init_random -- initialization distribution for random weight parameter
                              (if None, will use Gaussian distribution)
        bias_init_random -- initialization distribution for random bias pararmeter
                            (if None, will use Uniform distribution)
        weight_init_learned -- initialization distribution for learned weight parameter
                               (if None, will use Gaussian distribution)
        bias_init_learned -- initialization distribution for learned bias pararmeter
                             (if None, will use Uniform distribution)
        weight_optim -- optimizer for weight params for learned features
        bias_optim -- optimizer for bias param for learned features

        set_weight_as_global_constant -- if True, initialized random parameters
                                         will be constant across all distributed
                                         instances of the layer
    """
    def __init__(
            self,
            model,
            input_record,
            output_dims,
            s=1,
            scale_random=1.0,
            scale_learned=1.0,
            weight_init_random=None,
            bias_init_random=None,
            weight_init_learned=None,
            bias_init_learned=None,
            weight_optim=None,
            bias_optim=None,
            set_weight_as_global_constant=False,
            name='semi_random_features',
            **kwargs):

        if isinstance(input_record, schema.Struct):
            schema.is_schema_subset(
                schema.Struct(
                    ('full', schema.Scalar()),
                    ('random', schema.Scalar()),
                ),
                input_record
            )
            self.input_record_full = input_record.full
            self.input_record_random = input_record.random

        elif isinstance(input_record, schema.Scalar):
            self.input_record_full = input_record
            self.input_record_random = input_record

        super(SemiRandomFeatures, self).__init__(
            model,
            self.input_record_full,
            output_dims,
            s=s,
            scale=scale_random,       # To initialize the random parameters
            weight_init=weight_init_random,
            bias_init=bias_init_random,
            weight_optim=None,
            bias_optim=None,
            set_weight_as_global_constant=set_weight_as_global_constant,
            initialize_output_schema=False,
            name=name,
            **kwargs)

        self.output_schema = schema.Struct(
            ('full', schema.Scalar(
                (np.float32, output_dims),
                model.net.NextScopedBlob(name + '_full_output')
            ),),
            ('random', schema.Scalar(
                (np.float32, output_dims),
                model.net.NextScopedBlob(name + '_random_output')
            ),),
        )

        # To initialize the learnable parameters
        assert (scale_learned > 0.0), \
            "Expected scale (learned) > 0, got %s" % scale_learned
        self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims)

        # Learned Parameters
        (self.learned_w, self.learned_b) = self._initialize_params(
            'learned_w',
            'learned_b',
            w_init=weight_init_learned,
            b_init=bias_init_learned,
            w_optim=weight_optim,
            b_optim=bias_optim
        )

    def add_ops(self, net):
        # Learned features: wx + b
        learned_features = net.FC(self.input_record_full.field_blobs() +
                                  [self.learned_w, self.learned_b],
                                  net.NextScopedBlob('learned_features'))
        # Random features: wx + b
        random_features = net.FC(self.input_record_random.field_blobs() +
                                 [self.random_w, self.random_b],
                                 net.NextScopedBlob('random_features'))
        processed_random_features = self._heaviside_with_power(
            net,
            random_features,
            self.output_schema.random.field_blobs(),
            self.s
        )
        net.Mul([processed_random_features, learned_features],
                self.output_schema.full.field_blobs())