File: framework_overhead_benchmark.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (99 lines) | stat: -rw-r--r-- 4,710 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from utils import ms_to_us, benchmark_module, BenchmarkConfig, ModuleConfig
import argparse
from C2Module import C2SimpleNet

from SimpleAddModule import SimpleAddModule, add_tensors_loop
from pt_wrapper_module import WrapperModule

""" Framework overhead benchmark script.
Benchmark framework overhead.
Currently supported ops: add.
As of now runs only forward pass.
Supports both graph mode and eager mode. In graph mode the module is traced via JIT tracing.
Debug option prints the traced graph is graph_mode is enabled.
Graph can be saved via save option. Saved in the directory where benchmark is run.
Example build/run:
To run PT benchmark:
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
 --add_op --graph_mode --eager_mode (Runs both graph mode and eager mode)
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
 --add_op --graph_mode (Runs only graph mode)
To run C2 benchmark:
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
 --add_op --benchmark_c2_net
"""

SUPPORTED_OPS = {"add_op"}

def parse_op_args(op):
    op_list = ops.split(",")

def print_results(result):
    print("===================================")
    for key, value in result.items():
        print("{}, latency per iter (us):{}".format(key, ms_to_us(value)))
    print("===================================")

def benchmark_simple_fn(args, config, module_config, module_type, result):
    """ Benchmarks a PyTorch traceable function specified in the config.
    Instantiates a wrapper object that wraps the object of module_type and runs the forward
    method using benchmark_module.
    Args:
        config:         contains number of warmup and benchmark iterations.
        module_config:  module_config which contains op, number of parameters that op takes
                    and whether graph mode is enabled or not.
        module_type:    Type of the module to be wrapped. e.g. SimpleAddModule for add op.
        result:         dictionary instance to be populated with the benchmark result (latency per iter).
    """
    benchmark_c2_net = args.benchmark_c2_net
    print("Benchmarking {}".format(module_type.__name__))
    if benchmark_c2_net:
        op_name = module_config.c2_op
        num_inputs = module_config.num_params
        module = C2SimpleNet(op_name, num_inputs=num_inputs, debug=args.debug)
        latency_per_iter_ms = benchmark_module(config, module)
        result[op_name] = latency_per_iter_ms
    else:
        f_name = module_config.pt_fn.__name__ + ":Num Operands=" + str(module_config.num_params)
        graph_mode_str = "Graph mode" + ":" + str(module_config.graph_mode)
        result_key = ','.join((f_name, graph_mode_str))
        module = WrapperModule(module_type, module_config, args.debug, args.save)
        latency_per_iter_ms = benchmark_module(config, module, args.use_throughput_benchmark)
        result[result_key] = latency_per_iter_ms

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--op", default="add_op", dest="op", type=str)
    parser.add_argument("--benchmark_c2_net", default=False, dest="benchmark_c2_net", action="store_true")
    parser.add_argument("--use_throughput_benchmark", default=False, dest="use_throughput_benchmark", action="store_true")
    parser.add_argument("--debug", default=False, dest="debug", action="store_true")
    parser.add_argument("--save", default=False, dest="save", action="store_true")
    parser.add_argument("--eager_mode", default=False, dest="eager_mode", action="store_true")
    parser.add_argument("--num_warmup_iters", type=int, default=100)
    parser.add_argument("--num_iters", type=int, default=1000)
    args = parser.parse_args()

    if args.op not in SUPPORTED_OPS:
        print("Op {} is not supported: Supported ops are:{}".format(args.op, SUPPORTED_OPS))
        return
    assert not (args.benchmark_c2_net and args.use_throughput_benchmark), \
        "Benchmarking of C2 net via throughput benchmarking is not yet supported"

    num_warmup_iters = args.num_warmup_iters
    num_iters = args.num_iters
    config = BenchmarkConfig(num_warmup_iters, num_iters)
    graph_mode = True
    if args.eager_mode:
        graph_mode = False
    result = {}
    if args.op == "add_op":
        num_params = 2
        if args.benchmark_c2_net:
            module_config = ModuleConfig(None, 'Sum', num_params, None)
        else:
            module_config = ModuleConfig(add_tensors_loop, None, num_params, graph_mode)
        benchmark_simple_fn(args, config, module_config, SimpleAddModule, result)
    print_results(result)

if __name__ == "__main__":
    main()