File: numa_benchmark.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (69 lines) | stat: -rw-r--r-- 2,230 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69




from caffe2.python import core, workspace
from caffe2.proto import caffe2_pb2
import time

SHAPE_LEN = 4096
NUM_ITER = 1000
GB = 1024 * 1024 * 1024
NUM_REPLICAS = 48


def build_net(net_name, cross_socket):
    init_net = core.Net(net_name + "_init")
    init_net.Proto().type = "async_scheduling"
    numa_device_option = caffe2_pb2.DeviceOption()
    numa_device_option.device_type = caffe2_pb2.CPU
    numa_device_option.numa_node_id = 0
    for replica_id in range(NUM_REPLICAS):
        init_net.XavierFill([], net_name + "/input_blob_" + str(replica_id),
            shape=[SHAPE_LEN, SHAPE_LEN], device_option=numa_device_option)

    net = core.Net(net_name)
    net.Proto().type = "async_scheduling"
    if cross_socket:
        numa_device_option.numa_node_id = 1
    for replica_id in range(NUM_REPLICAS):
        net.Copy(net_name + "/input_blob_" + str(replica_id),
                net_name + "/output_blob_" + str(replica_id),
                device_option=numa_device_option)
    return init_net, net


def main():
    assert workspace.IsNUMAEnabled() and workspace.GetNumNUMANodes() >= 2

    single_init, single_net = build_net("single_net", False)
    cross_init, cross_net = build_net("cross_net", True)

    workspace.CreateNet(single_init)
    workspace.RunNet(single_init.Name())
    workspace.CreateNet(cross_init)
    workspace.RunNet(cross_init.Name())

    workspace.CreateNet(single_net)
    workspace.CreateNet(cross_net)

    for _ in range(4):
        t = time.time()
        workspace.RunNet(single_net.Name(), NUM_ITER)
        dt = time.time() - t
        print("Single socket time:", dt)
        single_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB
        print("Single socket BW: {} GB/s".format(single_bw))

        t = time.time()
        workspace.RunNet(cross_net.Name(), NUM_ITER)
        dt = time.time() - t
        print("Cross socket time:", dt)
        cross_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB
        print("Cross socket BW: {} GB/s".format(cross_bw))
        print("Single BW / Cross BW: {}".format(single_bw / cross_bw))


if __name__ == '__main__':
    core.GlobalInit(["caffe2", "--caffe2_cpu_numa_enabled=1"])
    main()