File: example_operator_perf_test.py

package info (click to toggle)
onnxruntime 1.23.2%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 340,756 kB
  • sloc: cpp: 3,222,136; python: 188,267; ansic: 114,318; asm: 37,927; cs: 36,849; java: 10,962; javascript: 6,811; pascal: 4,126; sh: 2,996; xml: 705; objc: 281; makefile: 67
file content (149 lines) | stat: -rw-r--r-- 5,439 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
Example python code for creating a model with a single operator and performance testing it with various
input combinations.
"""

import time
import timeit

import numpy as np
import onnx

# if you copy this script elsewhere you may need to add the tools\python dir to the sys.path for this
# import to work.
# e.g. sys.path.append(r'<path to onnxruntime source>\tools\python')
import ort_test_dir_utils
from onnx import TensorProto, helper

import onnxruntime as rt

# make input deterministic
np.random.seed(123)


#
# Example code to create a model with just the operator to test. Adjust as necessary for what you want to test.
#
def create_model(model_name):
    graph_def = helper.make_graph(
        nodes=[
            helper.make_node(
                op_type="TopK",
                inputs=["X", "K"],
                outputs=["Values", "Indices"],
                name="topk",
                # attributes are also key-value pairs using the attribute name and appropriate type
                largest=1,
            ),
        ],
        name="test-model",
        inputs=[
            # create inputs with symbolic dims so we can use any input sizes
            helper.make_tensor_value_info("X", TensorProto.FLOAT, ["batch", "items"]),
            helper.make_tensor_value_info("K", TensorProto.INT64, [1]),
        ],
        outputs=[
            helper.make_tensor_value_info("Values", TensorProto.FLOAT, ["batch", "k"]),
            helper.make_tensor_value_info("Indices", TensorProto.INT64, ["batch", "k"]),
        ],
        initializer=[],
    )

    model = helper.make_model(graph_def, opset_imports=[helper.make_operatorsetid("", 11)])
    onnx.checker.check_model(model)

    onnx.save_model(model, model_name)


#
# Example code to create random input. Adjust as necessary for the input your model requires
#
def create_test_input(n, num_items, k):
    x = np.random.randn(n, num_items).astype(np.float32)
    k_in = np.asarray([k]).astype(np.int64)
    inputs = {"X": x, "K": k_in}

    return inputs


#
# Example code that tests various combinations of input sizes.
#
def run_perf_tests(model_path, num_threads=1):
    so = rt.SessionOptions()
    so.intra_op_num_threads = num_threads
    sess = rt.InferenceSession(model_path, sess_options=so)

    batches = [10, 25, 50]
    batch_size = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
    k_vals = [1, 2, 4, 6, 8, 16, 24, 32, 48, 64, 128]

    # exploit scope to access variables from below for each iteration
    def run_test():
        num_seconds = 1 * 1000 * 1000 * 1000  # seconds in ns
        iters = 0
        total = 0
        total_iters = 0

        # For a simple model execution can be faster than time.time_ns() updates. Due to this we want to estimate
        # a number of iterations per measurement.
        # Estimate based on iterations in 5ms, but note that 5ms includes all the time_ns calls
        # which are excluded in the real measurement. The actual time that many iterations
        # takes will be much lower if the individual execution time is very small.
        start = time.time_ns()
        while time.time_ns() - start < 5 * 1000 * 1000:  # 5 ms
            sess.run(None, inputs)
            iters += 1

        # run the model and measure time after 'iters' calls
        while total < num_seconds:
            start = time.time_ns()
            for _i in range(iters):
                # ignore the outputs as we're not validating them in a performance test
                sess.run(None, inputs)
            end = time.time_ns()
            assert end - start > 0
            total += end - start
            total_iters += iters

        # Adjust the output you want as needed
        print(f"n={n},items={num_items},k={k},avg:{total / total_iters:.4f}")

    # combine the various input parameters and create input for each test
    for n in batches:
        for num_items in batch_size:
            for k in k_vals:
                if k < num_items:
                    # adjust as necessary for the inputs your model requires
                    inputs = create_test_input(n, num_items, k)

                    # use timeit to disable gc etc. but let each test measure total time and average time
                    # as multiple iterations may be required between each measurement
                    timeit.timeit(lambda: run_test(), number=1)


#
# example for creating a test directory for use with onnx_test_runner or onnxruntime_perf_test
# so that the model can be easily run directly or from a debugger.
#
def create_example_test_directory():
    # fill in the inputs that we want to use specific values for
    input_data = {}
    input_data["K"] = np.asarray([64]).astype(np.int64)

    # provide symbolic dim values as needed
    symbolic_dim_values = {"batch": 25, "items": 256}

    # create the directory. random input will be created for any missing inputs.
    # the model will be run and the output will be saved as expected output for future runs
    ort_test_dir_utils.create_test_dir("topk.onnx", "PerfTests", "test1", input_data, symbolic_dim_values)


# this will create the model file in the current directory
create_model("topk.onnx")

# this will create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test
create_example_test_directory()

# this can loop over various combinations of input, using the specified number of threads
run_perf_tests("topk.onnx", 1)