File: reservoir_sampling.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (89 lines) | stat: -rw-r--r-- 3,013 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
## @package reservoir_sampling
# Module caffe2.python.layers.reservoir_sampling





from caffe2.python import core, schema
from caffe2.python.layers.layers import ModelLayer


class ReservoirSampling(ModelLayer):
    """
    Collect samples from input record w/ reservoir sampling. If you have complex
    data, use PackRecords to pack it before using this layer.

    This layer is not thread safe.
    """

    def __init__(self, model, input_record, num_to_collect,
                 name='reservoir_sampling', **kwargs):
        super(ReservoirSampling, self).__init__(
            model, name, input_record, **kwargs)
        assert num_to_collect > 0
        self.num_to_collect = num_to_collect

        self.reservoir = self.create_param(
            param_name='reservoir',
            shape=[0],
            initializer=('ConstantFill',),
            optimizer=model.NoOptim,
        )
        self.num_visited_blob = self.create_param(
            param_name='num_visited',
            shape=[],
            initializer=('ConstantFill', {
                'value': 0,
                'dtype': core.DataType.INT64,
            }),
            optimizer=model.NoOptim,
        )
        self.mutex = self.create_param(
            param_name='mutex',
            shape=[],
            initializer=('CreateMutex',),
            optimizer=model.NoOptim,
        )

        self.extra_input_blobs = []
        self.extra_output_blobs = []
        if 'object_id' in input_record:
            object_to_pos = self.create_param(
                param_name='object_to_pos',
                shape=None,
                initializer=('CreateMap', {
                    'key_dtype': core.DataType.INT64,
                    'valued_dtype': core.DataType.INT32,
                }),
                optimizer=model.NoOptim,
            )
            pos_to_object = self.create_param(
                param_name='pos_to_object',
                shape=[0],
                initializer=('ConstantFill', {
                    'value': 0,
                    'dtype': core.DataType.INT64,
                }),
                optimizer=model.NoOptim,
            )
            self.extra_input_blobs.append(input_record.object_id())
            self.extra_input_blobs.extend([object_to_pos, pos_to_object])
            self.extra_output_blobs.extend([object_to_pos, pos_to_object])

        self.output_schema = schema.Struct(
            (
                'reservoir',
                schema.from_blob_list(input_record.data, [self.reservoir])
            ),
            ('num_visited', schema.Scalar(blob=self.num_visited_blob)),
            ('mutex', schema.Scalar(blob=self.mutex)),
        )

    def add_ops(self, net):
        net.ReservoirSampling(
            [self.reservoir, self.num_visited_blob, self.input_record.data(),
             self.mutex] + self.extra_input_blobs,
            [self.reservoir, self.num_visited_blob] + self.extra_output_blobs,
            num_to_collect=self.num_to_collect,
        )