File: fp16_fma.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (16 lines) | stat: -rw-r--r-- 414 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#pragma once
#include <glog/logging.h>

namespace fake_fp16 {

// Compute FMA using fp16 accumulation
// Out = FMA (A, B, Out)
void fma_fp16(int N, const float* A, const float* B, float* Out);

void fma_fp16_slow(int N, const float* A, const float* B, float* Out);

float fma_fp16_slow(const float A, const float B, float Out);

float fmafp32_avx_emulation(float v1, float v2, float v3);

} // namespace fake_fp16