1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
|
#define __NVFUSER_HALF_TO_US(var) *(reinterpret_cast<unsigned short*>(&(var)))
#define __NVFUSER_HALF_TO_CUS(var) \
*(reinterpret_cast<const unsigned short*>(&(var)))
struct __half;
__device__ __half __float2half(const float);
struct __align__(2) __half {
__half() = default;
__device__ __half(const float f) {
__x = __float2half(f).__x;
}
protected:
unsigned short __x;
};
__device__ __half __float2half(const float f) {
__half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n"
: "=h"(__NVFUSER_HALF_TO_US(val))
: "f"(f));
return val;
}
__device__ float __half2float(const __half h) {
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(__NVFUSER_HALF_TO_CUS(h)));
return val;
}
__device__ __half __double2half(const double d) {
#if __CUDA_ARCH__ >= 700
__half val;
asm("{ cvt.rn.f16.f64 %0, %1;}\n"
: "=h"(__NVFUSER_HALF_TO_US(val))
: "d"(d));
return val;
#else
return __float2half(static_cast<float>(d));
#endif
}
__device__ double __half2double(const __half h) {
#if __CUDA_ARCH__ >= 700
double val;
asm("{ cvt.f64.f16 %0, %1;}\n" : "=d"(val) : "h"(__NVFUSER_HALF_TO_CUS(h)));
return val;
#else
return static_cast<double>(__half2float(h));
#endif
}
|