1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
import operator_benchmark as op_bench
import torch
import torch.nn.functional as F
"""Microbenchmarks for batchnorm operator."""
# Benchmark cudnn if available
if torch.backends.cudnn.is_available:
def cudnn_benchmark_configs(configs):
result = []
for config in configs:
is_cuda = any("cuda" in attr.values() for attr in config)
if is_cuda:
result.append((*config, dict(cudnn=True)))
result.append((*config, dict(cudnn=False)))
return result
else:
def cudnn_benchmark_configs(configs):
return [(*config, dict(cudnn=False)) for config in configs]
batchnorm_configs_short = cudnn_benchmark_configs(
op_bench.config_list(
attr_names=["M", "N", "K"],
attrs=[
[1, 256, 3136],
],
cross_product_configs={
"device": ["cpu", "cuda"],
"training": [True, False],
},
tags=["short"],
)
)
batchnorm_configs_long = cudnn_benchmark_configs(
op_bench.cross_product_configs(
M=[2, 128],
N=[8192, 2048],
K=[1],
device=["cpu", "cuda"],
training=[True, False],
tags=["long"],
)
)
class BatchNormBenchmark(op_bench.TorchBenchmarkBase):
def init(self, M, N, K, device, training, cudnn):
self.inputs = {
"input_one": torch.rand(
M, N, K, device=device, requires_grad=self.auto_set()
),
"mean": torch.rand(N, device=device),
"var": torch.rand(N, device=device),
"weight": torch.rand(N, device=device),
"bias": torch.rand(N, device=device),
"training": training,
"cudnn": cudnn,
}
self.set_module_name("batchnorm")
def forward(self, input_one, mean, var, weight, bias, training, cudnn):
with torch.backends.cudnn.flags(enabled=cudnn):
return F.batch_norm(input_one, mean, var, weight, bias, training)
op_bench.generate_pt_test(
batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark
)
op_bench.generate_pt_gradient_test(
batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark
)
batchnorm1d_configs_short = cudnn_benchmark_configs(
op_bench.config_list(
attr_names=["N", "C"],
attrs=[
[3136, 256],
],
cross_product_configs={
"device": ["cpu", "cuda"],
"training": [True, False],
},
tags=["short"],
)
)
batchnorm1d_configs_long = cudnn_benchmark_configs(
op_bench.cross_product_configs(
N=[2, 128],
C=[8192, 2048],
device=["cpu", "cuda"],
training=[True, False],
tags=["long"],
)
)
class BatchNorm1dBenchmark(op_bench.TorchBenchmarkBase):
def init(self, N, C, device, training, cudnn):
self.inputs = {
"input_one": torch.rand(N, C, device=device, requires_grad=self.auto_set()),
"mean": torch.rand(C, device=device),
"var": torch.rand(C, device=device),
"weight": torch.rand(C, device=device),
"bias": torch.rand(C, device=device),
"training": training,
"cudnn": cudnn,
}
self.set_module_name("batchnorm")
def forward(self, input_one, mean, var, weight, bias, training, cudnn):
with torch.backends.cudnn.flags(enabled=cudnn):
return F.batch_norm(input_one, mean, var, weight, bias, training)
op_bench.generate_pt_test(
batchnorm1d_configs_short + batchnorm1d_configs_long, BatchNorm1dBenchmark
)
op_bench.generate_pt_gradient_test(
batchnorm1d_configs_short + batchnorm1d_configs_long, BatchNorm1dBenchmark
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()
|