File: dump-performance.py

package info (click to toggle)
pyopencl 2025.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,336 kB
  • sloc: python: 21,012; cpp: 7,986; lisp: 3,539; ansic: 523; makefile: 45; sh: 37
file content (38 lines) | stat: -rw-r--r-- 1,332 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pyopencl as cl
import pyopencl.characterize.performance as perf


def main():
    ctx = cl.create_some_context()

    prof_overhead, latency = perf.get_profiling_overhead(ctx)
    print("command latency: %g s" % latency)
    print("profiling overhead: {:g} s -> {:.1f} %".format(
            prof_overhead, 100*prof_overhead/latency))
    queue = cl.CommandQueue(
            ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)

    print("empty kernel: %g s" % perf.get_empty_kernel_time(queue))
    print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9))

    for tx_type in [
            perf.HostToDeviceTransfer,
            perf.DeviceToHostTransfer,
            perf.DeviceToDeviceTransfer]:
        print("----------------------------------------")
        print(tx_type.__name__)
        print("----------------------------------------")

        print("latency: %g s" % perf.transfer_latency(queue, tx_type))
        for i in range(6, 31, 2):
            bs = 1 << i
            try:
                result = "%g GB/s" % (
                        perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
            except Exception as e:
                result = "exception: %s" % e.__class__.__name__
            print("bandwidth @ %d bytes: %s" % (bs, result))


if __name__ == "__main__":
    main()