File: at_launch_benchmark.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (94 lines) | stat: -rw-r--r-- 2,548 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include "ATen/Parallel.h"

#include "c10/util/Flags.h"
#include "caffe2/core/init.h"

#include <atomic>
#include <chrono>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <ctime>

C10_DEFINE_int(iter, 10e4, "Number of at::launch iterations (tasks)");
C10_DEFINE_int(warmup_iter, 10, "Number of warmup iterations")
C10_DEFINE_int(inter_op_threads, 0, "Number of inter-op threads");
C10_DEFINE_int(benchmark_iter, 3, "Number of times to run benchmark")

namespace {
int iter = 0;
std::atomic<int> counter{0};
std::condition_variable cv;
std::mutex mutex;
}

 void launch_tasks() {
  at::launch([]() {
    at::launch([](){
      at::launch([]() {
        auto cur_ctr = ++counter;
        if (cur_ctr == iter) {
          std::unique_lock<std::mutex> lk(mutex);
          cv.notify_one();
        }
      });
    });
  });
}

void launch_tasks_and_wait(int tasks_num) {
  iter = tasks_num;
  counter = 0;
  for (auto idx = 0; idx < iter; ++idx) {
    launch_tasks();
  }
  {
    std::unique_lock<std::mutex> lk(mutex);
    while (counter < iter) {
      cv.wait(lk);
    }
  }
}

int main(int argc, char** argv) {
  if (!c10::ParseCommandLineFlags(&argc, &argv)) {
    std::cout << "Failed to parse command line flags" << std::endl;
    return -1;
  }
  caffe2::unsafeRunCaffe2InitFunction("registerThreadPools");
  at::init_num_threads();

  if (FLAGS_inter_op_threads > 0) {
    at::set_num_interop_threads(FLAGS_inter_op_threads);
  }

  typedef std::chrono::high_resolution_clock clock;
  typedef std::chrono::milliseconds ms;

  std::cout << "Launching " << FLAGS_warmup_iter << " warmup tasks using "
            << at::get_num_interop_threads() << " threads "
            << std::endl;

  std::chrono::time_point<clock> start_time = clock::now();
  launch_tasks_and_wait(FLAGS_warmup_iter);
  auto duration = static_cast<float>(
      std::chrono::duration_cast<ms>(clock::now() - start_time).count());

  std::cout << "Warmup time: " << duration << " ms." << std::endl;

  std::cout << "Launching " << FLAGS_iter << " tasks using "
            << at::get_num_interop_threads() << " threads "
            << std::endl;

  for (auto bench_iter = 0; bench_iter < FLAGS_benchmark_iter; ++bench_iter) {
    start_time = clock::now();
    launch_tasks_and_wait(FLAGS_iter);
    duration = static_cast<float>(
        std::chrono::duration_cast<ms>(clock::now() - start_time).count());

    std::cout << "Time to run " << iter << " iterations "
              << (duration/1000.0) << " s." << std::endl;
  }

  return 0;
}