File: softmax.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (56 lines) | stat: -rw-r--r-- 1,370 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import scipy.special

from . import benchmark


class SoftmaxBench(benchmark.Benchmark):
    def __init__(self, mode, device, dtype, M, N):
        super().__init__(mode, device, dtype)
        self.M = M
        self.N = N
        self.dtype = dtype
        self.inputs = [
            self.randn(
                [M, N], device=device, dtype=dtype, requires_grad=self.requires_grad
            )
        ]

    def forward(self, inputs):
        x = self.add(inputs, 0.001)
        y = self.softmax(x, dim=-1, dtype=self.dtype)
        return y

    def reference(self):
        return scipy.special.softmax(self.numpy(self.inputs), axis=-1)

    def config(self):
        return [self.M, self.N]

    @staticmethod
    def module():
        return "softmax"

    def memory_workload(self):
        if self.mode == "fwd":
            sol_count = 1 + 1
            algorithmic_count = 3 + 1
        else:
            sol_count = (1 + 1) + (1 + 1)
            algorithmic_count = (3 + 1) + (3 + 1)

        buffer_size = self.M * self.N
        return {
            "sol": buffer_size * sol_count,
            "algorithmic": buffer_size * algorithmic_count,
        }

    @staticmethod
    def default_configs():
        return [
            [480, 20],
            [1 << 15, 32],
            [128, 1 << 16],
        ]


benchmark.register_benchmark_class(SoftmaxBench)