File: concat_test.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (130 lines) | stat: -rw-r--r-- 4,529 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import operator_benchmark as op_bench
import benchmark_caffe2 as op_bench_c2
import random
from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
from caffe2.python import core


"""Microbenchmarks for Concat operator. Supports both Caffe2/PyTorch."""

cross_product_configs = {
    'device': ['cpu', 'cuda'],
    'dtype': ['float'],
    'add_axis': [0],
}

# Configs for C2 concat operator
cat_configs_short = op_bench.config_list(
    attr_names=['sizes', 'N', 'axis'],
    attrs=[
        [(1,    1,      1), 2, 0],  # noqa: E241
        [(512,  512,    2), 2, 1],  # noqa: E241
        [(128, 1024,    2), 2, 1],  # noqa: E241
    ],
    cross_product_configs=cross_product_configs,
    tags=['short'],
)

# Configs specific to static runtime feature - a fast runtime for pared down models
cat_configs_static_runtime = op_bench.config_list(
    attr_names=['sizes', 'N', 'axis', 'add_axis'],
    attrs=[
        [(1, 40), 5, 1, 1],
        [[(1, 160), (1, 14)], -1, 1, 0],
        [[(1, 20, 40), (1, 4, 40), (1, 5, 40)], -1, 1, 0],
        [[(1, 580), (1, 174)], -1, 1, 0],
        [(20, 40), 5, 1, 1],
        [[(20, 160), (20, 14)], -1, 1, 0],
        [[(20, 20, 40), (20, 4, 40), (20, 5, 40)], -1, 1, 0],
        [[(20, 580), (20, 174)], -1, 1, 0],
    ],
    cross_product_configs=cross_product_configs,
    tags=['static_runtime'],
)

cat_configs_long = op_bench.config_list(
    attr_names=['sizes', 'N', 'axis'],
    attrs=[
        [(2**10,    2**10,      2), 2, 0],  # noqa: E241
        [(2**10+1,  2**10-1,    2), 2, 1],  # noqa: E226,E241
        [(2**10,    2**10,      2), 2, 2],  # noqa: E241

        [[ lambda: random.randint(2**6, 2**7),      2**7-17,    2**6+1],  # noqa: E201,E226,E241
            5, 0],
        [[ 2**6+2**5,   lambda: random.randint(2**6, 2**7),     2**6],  # noqa: E201,E226,E241,E272
            5, 1],
        [[ 2**7,        2**6,       lambda: random.randint(2**6, 2**7)],  # noqa: E201,E241,E272
            5, 2],

        [[lambda: random.randint(2**5, 2**6),       2**5,       2**6],  # noqa: E241
            50, 0],
        [[2**5,         lambda: random.randint(2**5, 2**6),     2**6],  # noqa: E241,E272
            50, 1],
        [[2**5+1,       2**6+1,         lambda: random.randint(2**5, 2**6)],  # noqa: E226,E241,E272
            50, 2],
    ],
    cross_product_configs=cross_product_configs,
    tags=['long'],
)

# There is a different codepath on CUDA for >4 dimensions
cat_configs_multidim = op_bench.config_list(
    attr_names=['sizes', 'N', 'axis', 'dtype'],
    attrs=[
        [(2**6,     2**5,   2**2,   2**4,   2**5), 2, 2],  # noqa: E241
        [(2**4,     2**5,   2**2,   2**4,   2**5), 8, 2],  # noqa: E241
        [(2**3+1,   2**5-1, 2**2+1, 2**4-1, 2**5+1), 17, 4],  # noqa: E226,E241
    ],
    cross_product_configs=cross_product_configs,
    tags=['multidim'],
)

cat_configs_manyinputs = op_bench.config_list(
    attr_names=['sizes', 'N', 'axis'],
    attrs=[
        [[lambda: random.randint(1, 10000)], 100, 0],
        [[lambda: random.randint(1, 1000)], 1000, 0],
        [[lambda: random.randint(1, 500)], 2000, 0],
        [[lambda: random.randint(1, 300)], 3000, 0],
    ],
    cross_product_configs=cross_product_configs,
    tags=['manyinputs'],
)


class ConcatBenchmark(op_bench_c2.Caffe2BenchmarkBase):
    def init(self, sizes, N, axis, add_axis, dtype, device):
        random.seed(42)
        self.inputs = []
        self.args = {'axis': axis, 'add_axis': add_axis}
        gen_sizes = []
        if type(sizes) == list and N == -1:
            gen_sizes = sizes
        else:
            for i in range(N):
                gen_sizes.append([old_size() if callable(old_size) else old_size for old_size in sizes])

        for s in gen_sizes:
            self.inputs.append(self.tensor(s, dtype, device=device))

        self.output = self.tensor(gen_sizes[0], dtype, device=device)
        self.split_info = self.tensor(gen_sizes[0], "int")
        self.set_module_name("concat")

    def forward(self):
        op = core.CreateOperator(
            "Concat", self.inputs, [self.output, self.split_info], **self.args
        )
        return op


op_bench_c2.generate_c2_test(cat_configs_short +
                             cat_configs_long +
                             cat_configs_multidim +
                             cat_configs_manyinputs +
                             cat_configs_static_runtime,
                             ConcatBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()