File: benchmark.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (68 lines) | stat: -rw-r--r-- 2,379 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include "caffe2/core/init.h"
#include "caffe2/predictor/emulator/std_output_formatter.h"

#include "benchmark.h"

// Basic benchmark params
C10_DEFINE_int(warmup, 10000, "The number of iterations to warm up.");
C10_DEFINE_int(iter, 10000000, "The number of iterations to run.");
C10_DEFINE_int(threads, 32, "The number of threads to run.");
C10_DEFINE_int(runs, 10, "The number of independent runs.");

// Benchmark setup params
C10_DEFINE_int(
    num_loading_threads,
    56,
    "The number of threads to build predictors.");

// Benchmark model params
C10_DEFINE_string(run_net, "", "The given net to benchmark.");
C10_DEFINE_string(init_net, "", "The given net to initialize.");
C10_DEFINE_string(data_net, "", "The given net to get input data.");
C10_DEFINE_string(
    input_dims,
    "",
    "The path of the file that "
    "stores input dimensions of all the operators in the run net. "
    "Each element of the array is a mapping from "
    "operator index to its input dimension.");
C10_DEFINE_string(
    input_types,
    "",
    "The path of the file that "
    "stores input types of all the operators in the run net. "
    "Each element of the array is a mapping from "
    "operator index to its input types.");

namespace caffe2 {
namespace emulator {

void BenchmarkRunner::benchmark(const BenchmarkParam& param) {
  param.emulator->init();
  std::vector<float> durations_ms;
  for (size_t run = 0; run < FLAGS_runs; ++run) {
    LOG(WARNING) << "Starting run " << run + 1;
    LOG(INFO) << "Warming up " << FLAGS_threads << " threads with "
              << FLAGS_warmup << " iterations...";
    param.emulator->run(FLAGS_warmup);

    LOG(INFO) << "Starting benchmark with " << FLAGS_iter << " iterations...";
    pre_benchmark_setup();
    const auto duration_ms =
        param.profiler->profile([&]() { param.emulator->run(FLAGS_iter); });

    durations_ms.emplace_back(duration_ms);
    auto throughput = FLAGS_iter / (duration_ms / MS_IN_SECOND);
    LOG(INFO) << "Benchmark run finished in " << duration_ms / MS_IN_SECOND
              << " seconds.\n"
              << "Throughput:\t\t" << throughput << " iterations/s\n";

    post_benchmark_cleanup();
    LOG(INFO) << "Run " << run + 1 << " finished";
  }
  LOG(WARNING) << param.formatter->format(
      durations_ms, FLAGS_threads, FLAGS_iter);
}

} // namespace emulator
} // namespace caffe2