File: elementwise_sum_benchmark.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (36 lines) | stat: -rw-r--r-- 820 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <chrono>
#include <cstdint>
#include <iostream>
#include <vector>

#include "utility_dnnlowp_ops.h"

using namespace std;

int main(int argc, const char* argv[]) {
  int LEN = argc > 1 ? atoi(argv[1]) : 65536;

  vector<uint8_t> a(LEN), b(LEN), c_avx2(LEN), c_avx512(LEN);
  for (int i = 0; i < LEN; ++i) {
    a[i] = i % 256;
    b[i] = (i * 2) % 256;
  }

  chrono::time_point<chrono::system_clock> t = chrono::system_clock::now();
  caffe2::internal::ElementWiseSumAVX2<uint8_t, false>(
      a.data(),
      b.data(),
      c_avx2.data(),
      a.size(),
      1.0f,
      11,
      2.0f,
      22,
      3.0f,
      33);
  double dt = chrono::duration<double>(chrono::system_clock::now() - t).count();
  double bytes = 3. * LEN * sizeof(a[0]);
  cout << bytes / dt / 1e9 << " GB/s" << endl;

  return 0;
}