1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
# Owner(s): ["module: inductor"]
import unittest
import torch
from torch._dynamo.utils import counters
from torch._inductor.runtime.benchmarking import Benchmarker, TritonBenchmarker
from torch._inductor.test_case import run_tests, TestCase
from torch.testing._internal.common_utils import (
decorateIf,
instantiate_parametrized_tests,
parametrize,
)
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
ALL_BENCHMARKER_CLASSES = (
Benchmarker,
TritonBenchmarker,
)
@instantiate_parametrized_tests
class TestBenchmarker(TestCase):
def setUp(self):
super().setUp()
torch.manual_seed(12345)
counters.clear()
@staticmethod
def get_counter_value(benchmarker_cls, fn_name):
return counters["inductor"][
f"benchmarking.{benchmarker_cls.__name__}.{fn_name}"
]
@staticmethod
def make_params(device, size=100):
fn, fn_args, fn_kwargs = torch.sum, (torch.randn(size, device=device),), {}
_callable = lambda: fn(*fn_args, **fn_kwargs) # noqa: E731
return (fn, fn_args, fn_kwargs), _callable
@unittest.skipIf(not HAS_CPU or not HAS_GPU, "requires CPU and GPU")
@decorateIf(
unittest.expectedFailure,
lambda params: params["benchmarker_cls"] is Benchmarker
and params["device"] == GPU_TYPE,
)
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
@parametrize("device", (GPU_TYPE, "cpu"))
def test_benchmark_smoke(self, benchmarker_cls, device):
benchmarker = benchmarker_cls()
(fn, fn_args, fn_kwargs), _ = self.make_params(device)
timing = benchmarker.benchmark(fn, fn_args, fn_kwargs)
self.assertGreater(timing, 0)
self.assertEqual(self.get_counter_value(benchmarker_cls, "benchmark"), 1)
self.assertEqual(
self.get_counter_value(
benchmarker_cls, "benchmark_cpu" if device == "cpu" else "benchmark_gpu"
),
1,
)
@unittest.skipIf(not HAS_CPU, "requires CPU")
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
def test_benchmark_cpu_smoke(self, benchmarker_cls, device="cpu"):
benchmarker = benchmarker_cls()
_, _callable = self.make_params(device)
timing = benchmarker.benchmark_cpu(_callable)
self.assertGreater(timing, 0)
self.assertEqual(self.get_counter_value(benchmarker_cls, "benchmark_cpu"), 1)
@unittest.skipIf(not HAS_GPU, "requires GPU")
@decorateIf(
unittest.expectedFailure,
lambda params: params["benchmarker_cls"] is Benchmarker,
)
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
def test_benchmark_gpu_smoke(self, benchmarker_cls, device=GPU_TYPE):
benchmarker = benchmarker_cls()
_, _callable = self.make_params(device)
timing = benchmarker.benchmark_gpu(_callable)
self.assertGreater(timing, 0)
self.assertEqual(self.get_counter_value(benchmarker_cls, "benchmark_gpu"), 1)
if benchmarker_cls is TritonBenchmarker:
self.assertEqual(
self.get_counter_value(benchmarker_cls, "triton_do_bench"), 1
)
@unittest.skipIf(not HAS_CPU and not HAS_GPU, "requires CPU or GPU")
@unittest.expectedFailure
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
def test_benchmark_safely_infers_device_no_devices(
self, benchmarker_cls, device="cpu" if HAS_CPU else GPU_TYPE
):
benchmarker = benchmarker_cls()
(fn, _, _), _ = self.make_params(device)
benchmarker.benchmark(fn, (), {})
@unittest.skipIf(not HAS_CPU or not HAS_GPU, "requires CPU and GPU")
@unittest.expectedFailure
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
def test_benchmark_safely_infers_device_many_devices(self, benchmarker_cls):
benchmarker = benchmarker_cls()
(fn, cpu_args, cpu_kwargs), _ = self.make_sum("cpu")
(_, gpu_args, gpu_kwargs), _ = self.make_sum(GPU_TYPE)
many_devices_args = cpu_args + gpu_args
many_devices_kwargs = cpu_kwargs
many_devices_kwargs.update(gpu_kwargs)
benchmarker.benchmark(fn, many_devices_args, many_devices_kwargs)
if __name__ == "__main__":
run_tests()
|