File: overheads.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (41 lines) | stat: -rw-r--r-- 923 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import time
import timeit

import numpy as np

import torch


def add1(x):
    return x + 1


def bench(name, fn, requires_grad):
    torch._dynamo.reset()
    x = torch.randn(1, requires_grad=requires_grad)
    start = time.perf_counter()
    for _ in range(3):
        fn(x)
    end = time.perf_counter()

    results = timeit.repeat(lambda: fn(x), number=1000, repeat=1000)
    print(f"{name} {np.median(results)*1000:.1f}us (warmup={end-start:.1f}s)")


def main():
    print("requires_grad=False")
    bench("eager   ", add1, False)
    bench("compiled", torch.compile(add1), False)
    print()
    print("requires_grad=True")
    bench("eager   ", add1, True)
    bench("compiled", torch.compile(add1), True)
    print()
    print("inference_mode()")
    with torch.inference_mode():
        bench("eager   ", add1, False)
        bench("compiled", torch.compile(add1), False)


if __name__ == "__main__":
    main()