File: benchmark_utils.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (62 lines) | stat: -rw-r--r-- 1,940 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import random
from typing import Any

import torch


def transpose_tensors(p_transpose_both: float = 0.05) -> tuple[bool, bool]:
    transpose_both = random.choices(
        [True, False], [p_transpose_both, 1 - p_transpose_both]
    )[0]
    if transpose_both:
        return (True, True)
    transpose_left = (True, False)
    transpose_right = (False, True)
    no_transpose = (False, False)
    return random.choices([transpose_left, transpose_right, no_transpose])[0]


def fits_in_memory(dtype: Any, m: int, k: int, n: int) -> Any:
    threshold_memory = torch.cuda.get_device_properties(0).total_memory / 4
    # dividing by 4 beause we otherwise sometimes run out of memory, I assume because
    # inductor creates copies of tensors for benchmarking?
    return dtype.itemsize * (m * k + k * n + m * n) < threshold_memory


def get_mm_tensors(
    m: int,
    k: int,
    n: int,
    transpose_left: bool,
    transpose_right: bool,
    dtype_left: Any,
    dtype_right: Any,
) -> tuple[Any, Any]:
    if transpose_left:
        a = torch.randn(k, m, dtype=dtype_left).t()
    else:
        a = torch.randn(m, k, dtype=dtype_left)

    if transpose_right:
        b = torch.randn(n, k, dtype=dtype_right).t()
    else:
        b = torch.randn(k, n, dtype=dtype_right)
    return (a, b)


def set_precision(dtype: Any, p_float32_prec_highest: float = 0.8) -> None:
    if dtype == torch.float32:
        precisions = ["high", "highest"]
        weights = [1 - p_float32_prec_highest, p_float32_prec_highest]
        precision = random.choices(precisions, weights)[0]
    else:
        precision = "high"
    torch.set_float32_matmul_precision(precision)


def get_random_between_pow2(min_power2: int, max_power2: int) -> int:
    i = random.randint(min_power2, max_power2 - 1)
    lower = 2**i + 1
    upper = 2 ** (i + 1) - 1
    assert lower <= upper, "lower must not be greater than upper"
    return random.randint(lower, upper)