File: matrix_mult_test.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (119 lines) | stat: -rw-r--r-- 2,971 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import operator_benchmark as op_bench
import torch

"""
Microbenchmarks for batch matrix mult with einsum and torch.bmm.
"""

batch_mm_configs_short = op_bench.config_list(
    attr_names=["B", "M", "N", "K"],
    attrs=[
        [4, 5, 3, 2],
        [32, 25, 20, 30],
        [128, 100, 120, 110],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)

batch_mm_configs_long = op_bench.config_list(
    attr_names=["B", "M", "N", "K"],
    attrs=[
        [128, 256, 128, 256],
        [512, 1024, 1024, 512],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["long"],
)

batch_mm_op_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['einsum_bmm', torch.einsum],
        ['bmm', torch.bmm],
    ],
)

class BatchMatrixMultBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, B, M, N, K, device, op_func):
        self.inputs = {
            "input_one": torch.rand(B, M, N, device=device),
            "input_two": torch.rand(B, N, K, device=device)
        }
        self.op_func = op_func

    def forward(self, input_one, input_two):
        if self.op_func.__name__ == "einsum":
            return torch.einsum('bij,bjk->bik', input_one, input_two)
        else:
            return torch.bmm(input_one, input_two)


"""
Microbenchmarks for element-wise matrix mult with einsum and torch.mul.
"""

batch_elementwise_configs_short = op_bench.config_list(
    attr_names=["B", "M", "N"],
    attrs=[
        [4, 5, 3],
        [32, 25, 20],
        [100, 90, 110],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)


batch_elementwise_configs_long = op_bench.cross_product_configs(
    B=[128, 512, 1024],
    M=[128, 512, 1024],
    N=[128, 512, 1024],
    device=['cpu', 'cuda'],
    tags=['long']
)

batch_elementwise_op_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['einsum_elementwise', torch.einsum],
        ['mul', torch.mul],
    ],
)

class BatchElementWiseBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, B, M, N, device, op_func):
        self.inputs = {
            "input_one": torch.rand(B, M, N, device=device),
            "input_two": torch.rand(B, M, N, device=device)
        }
        self.op_func = op_func

    def forward(self, input_one, input_two):
        if self.op_func.__name__ == "einsum":
            return torch.einsum('bij,bij->bij', input_one, input_two)
        else:
            return torch.mul(input_one, input_two)


op_bench.generate_pt_tests_from_op_list(
    batch_mm_op_list,
    batch_mm_configs_short + batch_mm_configs_long,
    BatchMatrixMultBenchmark,
)

op_bench.generate_pt_tests_from_op_list(
    batch_elementwise_op_list,
    batch_elementwise_configs_short + batch_elementwise_configs_long,
    BatchElementWiseBenchmark,
)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()