File: fp16_fma_test.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (41 lines) | stat: -rw-r--r-- 830 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cmath>
#include <vector>
#include "fp16_fma.h"

using namespace std;
using namespace fake_fp16;

TEST(FP16_FMA, Simple) {
  int x = 1;
  x += 2;
  int N = 6;

  vector<float> A(N, 1.23);
  vector<float> B(N, 2.34);
  vector<float> C(N, 3.45);
  fma_fp16(N, A.data(), B.data(), C.data());

  for (int i = 0; i < N; i++) {
    LOG(INFO) << C[i] << " ";
    ASSERT_TRUE(abs(C[i] - 6.32812) < 1e-3);
  }
}

TEST(FP16_FMA, Comprehensive) {
#if 0
#pragma omp parallel num_threads(30)
  for (uint16_t a = 0; a < 1 << 15; a++) {
    for (uint16_t b = 0; b < 1 << 15; b++) {
      for (uint16_t c = 0; c < 1 << 15; c++) {
        uint16_t z = a + b * c;

        //       fake_fma_fp16_slow(A[0], B[0], C[0]);
      }
    }
  }

  fake_fma_fp16_slow(A[0], B[0], C[0]);
#endif
}