1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
|
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "caffe2/core/context.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
void custom_fp16_gemm(
const int m,
const int k,
const int n,
const float* A_fp16,
const float* B_fp16,
const float beta,
float* C,
const bool use_acc_fp16,
const bool use_temp_accumulator);
void custom_fp16_gemm_with_trans(
const CBLAS_TRANSPOSE trans_A,
const CBLAS_TRANSPOSE trans_B,
const int m,
const int k,
const int n,
const float* A_fp16,
const float* B_fp16,
const float beta,
float* C,
const bool use_acc_fp16,
const bool use_temp_accumulator);
void transpose(const float* A, float* A_trans, int M, int N);
void custom_fp16_gemv(
const bool use_acc_fp16,
const bool use_custom_acc32,
const bool use_temp_accumulator,
const CBLAS_TRANSPOSE trans_A,
const int M,
const int N,
const float alpha,
const float* A,
const float* x,
const float beta,
float* y,
CPUContext* context);
void custom_fp16_gemm_batched(
const bool use_acc_fp16,
const bool use_custom_acc32,
const bool use_temp_accumulator,
const CBLAS_TRANSPOSE trans_A,
const CBLAS_TRANSPOSE trans_B,
const int batch_size,
const int M,
const int N,
const int K,
const float alpha,
const float** A,
const float** B,
const float beta,
float** C,
CPUContext* context);
void custom_fp16_gemm_strided_batched(
const bool use_acc_fp16,
const bool use_custom_acc32,
const bool use_temp_accumulator,
const CBLAS_TRANSPOSE trans_A,
const CBLAS_TRANSPOSE trans_B,
const int batch_size,
const int M,
const int N,
const int K,
const float alpha /* unused */,
const float* A,
const int A_stride,
const float* B,
const int B_stride,
const float beta,
float* C,
const int C_stride,
CPUContext* context);
} // namespace caffe2
|