File: benchmark.py

package info (click to toggle)
nnpack 0.0~git20200414.57616b9-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,496 kB
  • sloc: ansic: 26,263; cpp: 11,470; python: 4,394; asm: 1,116; makefile: 16; sh: 8
file content (184 lines) | stat: -rwxr-xr-x 7,895 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python

from __future__ import print_function


def extract_time(line, prefix):
	if line.startswith(prefix):
		line = line[len(prefix):].lstrip()
		line = line[:line.index(" ms")].rstrip()
		return line


def convolution(mode, batch_size, input_channels, output_channels, image_size, kernel_size, padding, algorithm, transform_strategy=None, threads=None, verbose=False, use_selldr=False):
	import subprocess
	if use_selldr:
		import os
		import sys
		nacl_sdk_dir = os.getenv("NACL_SDK_ROOT")
		if nacl_sdk_dir is None:
			print("Error: can not find Native Client SDK: set NACL_SDK_ROOT envorinment variable and try again", file=sys.stderr)
			sys.exit(1)
		benchmark_args = [os.path.join(nacl_sdk_dir, "tools", "sel_ldr.py"), "--",
			"bin/convolution-benchmark"]
	else:
		benchmark_args = ["bin/convolution-benchmark"]
	benchmark_args += [
		"-m", mode,
		"-b", str(batch_size),
		"-ic", str(input_channels),
		"-oc", str(output_channels),
		"-is", str(image_size[0]), str(image_size[1]),
		"-ip", str(padding),
		"-ks", str(kernel_size[0]), str(kernel_size[1]),
		"-a", algorithm
	]
	if mode == "inference" and transform_strategy is not None:
		benchmark_args += ["-ts", transform_strategy]
	if threads is not None:
		benchmark_args += ["-t", str(threads)]
	benchmark = subprocess.Popen(benchmark_args, stdout=subprocess.PIPE)
	benchmark_stdout, _ = benchmark.communicate()
	if benchmark.returncode == 0:
		output_lines = [line for line in benchmark_stdout.splitlines() if len(line)]
		total, input_transform, kernel_transform, output_transform, block_multiplication, overhead = None, None, None, None, None, None
		for output_line in output_lines:
			total = total or extract_time(output_line, "Time:")
			input_transform = input_transform or extract_time(output_line, "Input transform:")
			kernel_transform = kernel_transform or extract_time(output_line, "Kernel transform:")
			output_transform = output_transform or extract_time(output_line, "Output transform:")
			block_multiplication = block_multiplication or extract_time(output_line, "Block multiplication:")
			overhead = overhead or extract_time(output_line, "Overhead:")
		if verbose:
			return (total, input_transform, kernel_transform, output_transform, block_multiplication, overhead)
		else:
			return (total,)

def fully_connected(mode, batch_size, input_channels, output_channels, threads=None, verbose=False, use_selldr=False):
	import subprocess
	if use_selldr:
		import os
		import sys
		nacl_sdk_dir = os.getenv("NACL_SDK_ROOT")
		if nacl_sdk_dir is None:
			print("Error: can not find Native Client SDK: set NACL_SDK_ROOT envorinment variable and try again", file=sys.stderr)
			sys.exit(1)
		benchmark_args = [os.path.join(nacl_sdk_dir, "tools", "sel_ldr.py"), "--",
			"bin/fully-connected-benchmark"]
	else:
		benchmark_args = ["bin/fully-connected-benchmark"]
	benchmark_args += [
		"-m", mode,
		"-b", str(batch_size),
		"-ic", str(input_channels),
		"-oc", str(output_channels)
	]
	if threads is not None:
		benchmark_args += ["-t", str(threads)]
	benchmark = subprocess.Popen(benchmark_args, stdout=subprocess.PIPE)
	benchmark_stdout, _ = benchmark.communicate()
	if benchmark.returncode == 0:
		output_lines = [line for line in benchmark_stdout.splitlines() if len(line)]
		total, input_transform, kernel_transform, block_multiplication, overhead = None, None, None, None, None
		for output_line in output_lines:
			total = total or extract_time(output_line, "Time:")
			input_transform = input_transform or extract_time(output_line, "Input packing:")
			kernel_transform = kernel_transform or extract_time(output_line, "Kernel packing:")
			block_multiplication = block_multiplication or extract_time(output_line, "Block multiplication:")
			overhead = overhead or extract_time(output_line, "Overhead:")
		if verbose:
			return (total, input_transform, kernel_transform, block_multiplication, overhead)
		else:
			return (total,)

overfeat_fast_layers = [
	("conv2",   96,  256, (24, 24), (5, 5), 0),
	("conv3",  256,  512, (12, 12), (3, 3), 1),
	("conv4",  512, 1024, (12, 12), (3, 3), 1),
	("conv5", 1024, 1024, (12, 12), (3, 3), 1),
	("fc6", 36864, 3072),
	("fc7",  3072, 4096),
	("fc8",  4096, 1000),
]

alexnet_layers = [
	("conv2",  64, 192, (27, 27), (5, 5), 2),
	("conv3", 192, 384, (13, 13), (3, 3), 1),
	("conv4", 384, 256, (13, 13), (3, 3), 1),
	("conv5", 256, 256, (13, 13), (3, 3), 1),
	("fc6", 12544, 4096),
	("fc7",  4096, 4096),
	("fc8",  4096, 1000),
]

vgg_a_layers = [
	("conv1",     3,  64, (224, 224), (3, 3), 1),
	("conv2",    64, 128, (112, 112), (3, 3), 1),
	("conv3.1", 128, 256,   (56, 56), (3, 3), 1),
	("conv3.2", 256, 256,   (56, 56), (3, 3), 1),
	("conv4.1", 256, 512,   (28, 28), (3, 3), 1),
	("conv4.2", 512, 512,   (28, 28), (3, 3), 1),
	("conv5",   512, 512,   (14, 14), (3, 3), 1),
	("fc6", 25088, 4096),
	("fc7",  4096, 4096),
	("fc8",  4096, 1000),
]
		
if __name__ == "__main__":
	import argparse

	parser = argparse.ArgumentParser(
		description="NNPACK benchmarking script")
	parser.add_argument("--enable-selldr", dest="use_selldr", action="store_true")
	parser.add_argument("-l", "--layer", dest="layer", required=True, choices=["convolution", "fully-connected", "pooling"])
	parser.add_argument("-n", "--network", dest="network", required=True, choices=["vgg-a", "alexnet", "overfeat-fast"])
	parser.add_argument("-m", "--mode", dest="mode", required=True, choices=["inference", "output", "input-gradient", "kernel-gradient"])
	parser.add_argument("--transform-strategy", dest="transform_strategy", default="compute", choices=["compute", "precompute"])
	parser.add_argument("-b", "--batch", dest="batch", type=int)
	parser.add_argument("-t", "--threads", dest="threads")
	parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False)

	options = parser.parse_args()

	network_layers, default_batch = {
		"vgg-a": (vgg_a_layers, 64),
		"alexnet": (alexnet_layers, 128),
		"overfeat-fast": (overfeat_fast_layers, 128)
	}[options.network]
	layer_prefix = {
		"convolution": "conv",
		"fully-connected": "fc",
		"pooling": "pool"
	}[options.layer]
	network_layers = [layer for layer in network_layers if layer[0].startswith(layer_prefix)]

	batch = default_batch
	if options.batch is not None:
		batch = options.batch
		if batch != 1 and options.mode == "inference":
			raise ValueError("Non-unit batch {batch} is not allowed in inference mode".format(batch=batch))
	elif options.mode == "inference":
		batch = 1
	if options.transform_strategy is not None:
		if options.layer != "convolution":
			raise ValueError("Transform strategy {transform_strategy} is meaningless for non-convolutional layers".format(transform_strategy=transform_strategy))
		elif options.mode != "inference":
			raise ValueError("Transform strategy {transform_strategy} is meaningless in non-inference mode".format(transform_strategy=transform_strategy))

	if options.layer == "convolution":
		for name, input_channels, output_channels, image_size, kernel_size, padding in network_layers:
			measurements = [name]
			for algorithm in ["implicit-gemm", "ft8x8", "ft16x16", "wt8x8"]:
				if algorithm.startswith("wt") and kernel_size != (3, 3):
					continue

				measurements += list(convolution(options.mode, batch, input_channels, output_channels,
					image_size, kernel_size, padding, algorithm,
					transform_strategy=options.transform_strategy,
					threads=options.threads, verbose=options.verbose, use_selldr=options.use_selldr))
			print("\t".join(map(str, measurements)))
	elif options.layer == "fully-connected":
		for name, input_channels, output_channels in network_layers:
			measurements = fully_connected(options.mode, batch, input_channels, output_channels,
				threads=options.threads, verbose=options.verbose, use_selldr=options.use_selldr)
			print("{name}\t{measurements}".format(name=name, measurements="\t".join(measurements)))