File: benchmark.cc

package info (click to toggle)
pytorch 1.7.1-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 80,340 kB
  • sloc: cpp: 670,830; python: 343,991; ansic: 67,845; asm: 5,503; sh: 2,924; java: 2,888; xml: 266; makefile: 244; ruby: 148; yacc: 144; objc: 51; lex: 44
file content (68 lines) | stat: -rw-r--r-- 2,379 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include "caffe2/core/init.h"
#include "caffe2/predictor/emulator/std_output_formatter.h"

#include "benchmark.h"

// Basic benchmark params
C10_DEFINE_int(warmup, 10000, "The number of iterations to warm up.");
C10_DEFINE_int(iter, 10000000, "The number of iterations to run.");
C10_DEFINE_int(threads, 32, "The number of threads to run.");
C10_DEFINE_int(runs, 10, "The number of independent runs.");

// Benchmark setup params
C10_DEFINE_int(
    num_loading_threads,
    56,
    "The number of threads to build predictors.");

// Benchmark model params
C10_DEFINE_string(run_net, "", "The given net to benchmark.");
C10_DEFINE_string(init_net, "", "The given net to initialize.");
C10_DEFINE_string(data_net, "", "The given net to get input data.");
C10_DEFINE_string(
    input_dims,
    "",
    "The path of the file that "
    "stores input dimensions of all the operators in the run net. "
    "Each element of the array is a mapping from "
    "operator index to its input dimension.");
C10_DEFINE_string(
    input_types,
    "",
    "The path of the file that "
    "stores input types of all the operators in the run net. "
    "Each element of the array is a mapping from "
    "operator index to its input types.");

namespace caffe2 {
namespace emulator {

void BenchmarkRunner::benchmark(const BenchmarkParam& param) {
  param.emulator->init();
  std::vector<float> durations_ms;
  for (size_t run = 0; run < FLAGS_runs; ++run) {
    LOG(WARNING) << "Starting run " << run + 1;
    LOG(INFO) << "Warming up " << FLAGS_threads << " threads with "
              << FLAGS_warmup << " iterations...";
    param.emulator->run(FLAGS_warmup);

    LOG(INFO) << "Starting benchmark with " << FLAGS_iter << " iterations...";
    pre_benchmark_setup();
    const auto duration_ms =
        param.profiler->profile([&]() { param.emulator->run(FLAGS_iter); });

    durations_ms.emplace_back(duration_ms);
    auto throughput = FLAGS_iter / (duration_ms / MS_IN_SECOND);
    LOG(INFO) << "Benchmark run finished in " << duration_ms / MS_IN_SECOND
              << " seconds.\n"
              << "Throughput:\t\t" << throughput << " iterations/s\n";

    post_benchmark_cleanup();
    LOG(INFO) << "Run " << run + 1 << " finished";
  }
  LOG(WARNING) << param.formatter->format(
      durations_ms, FLAGS_threads, FLAGS_iter);
}

} // namespace emulator
} // namespace caffe2