1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#include "../../Headers/spirv.h"
#ifndef __EXP_HYPERBOLIC_CL__
#define __EXP_HYPERBOLIC_CL__
// These versions of exp are only used for hyperbolic functions:
// sinh, cosh and tanh.
// There are two main differences between them and the regular old
// version of exp:
// - This version uses a polynomial approximation for the
// fractional part of 2^x, vs. the native version. As such
// it is slightly more accurate.
// - This version accepts an optional "scale factor" that can
// be used to scale the result by 2^scale, without losing
// accuracy.
// This version is used for sinh and cosh, and part of tanh:
float __intel_exp_for_hyper(float x, float scale)
{
// e^x = 2^(log2(e^x)) = 2^(x * log2(e))
// We'll compute 2^(x * log2(e)) by splitting x * log2(e)
// into a whole part and fractional part.
// Compute the whole part of x * log2(e)
// This part is easy!
// Note: floor rounds to negative infinity.
float w = SPIRV_OCL_BUILTIN(floor, _f32, )( x * M_LOG2E_F + 0.5f );
// Compute the fractional part of x * log2(e)
// We have to do this carefully, so we don't lose precision.
// Compute as:
// fract( x * log2(e) ) = ( x - w * C1 - w * C2 ) * log2(e)
// C1 is the "Cephes Constant", and is close to 1/log2(e)
// C2 is the difference between the "Cephes Constant" and 1/log2(e)
const float C1 = as_float( 0x3F317200 ); // 0.693145751953125
const float C2 = as_float( 0x35BFBE8E ); // 0.000001428606765330187
float f = x;
f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C1, f );
f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C2, f );
// Do a polynomial approximation for 2^fractional.
float f2 = f * f;
f = ((((( 1.9875691500E-4f * f
+ 1.3981999507E-3f) * f
+ 8.3334519073E-3f) * f
+ 4.1665795894E-2f) * f
+ 1.6666665459E-1f) * f
+ 5.0000001201E-1f) * f2
+ f
+ 1.0f;
// By doing this computation as 2^(w - 2) * 2^2 we can avoid an
// overflow case for very large values of w.
w = SPIRV_OCL_BUILTIN(native_exp2, _f32, )( w + scale ); // this should be exact
float res = w * f;
res = ( x < as_float( 0xC2D20000 ) ) ? as_float( 0x00000000 ) : res;
res = ( x > as_float( 0x42D20000 ) ) ? as_float( 0x7F800000 ) : res;
return res;
}
float __intel_exp_for_tanh(float x, float scale)
{
float px = SPIRV_OCL_BUILTIN(fabs, _f32, )(x);
// e^x = 2^(log2(e^x)) = 2^(x * log2(e))
// We'll compute 2^(x * log2(e)) by splitting x * log2(e)
// into a whole part and fractional part.
// Compute the whole part of x * log2(e)
// This part is easy!
// Note: floor rounds to negative infinity.
float w = SPIRV_OCL_BUILTIN(floor, _f32, )( px * M_LOG2E_F + 0.5f );
// Compute the fractional part of x * log2(e)
// We have to do this carefully, so we don't lose precision.
// Compute as:
// fract( x * log2(e) ) = ( x - w * C1 - w * C2 ) * log2(e)
// C1 is the "Cephes Constant", and is close to 1/log2(e)
// C2 is the difference between the "Cephes Constant" and 1/log2(e)
const float C1 = as_float( 0x3F317200 ); // 0.693145751953125
const float C2 = as_float( 0x35BFBE8E ); // 0.000001428606765330187
float f = px;
f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C1, f );
f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C2, f );
// Do a polynomial approximation for 2^fractional.
float tf =
((((( 1.9875691500E-4f * f
+ 1.3981999507E-3f) * f
+ 8.3334519073E-3f) * f
+ 4.1665795894E-2f) * f
+ 1.6666665459E-1f) * f
+ 5.0000001201E-1f) * f * f
+ f
+ 1.0f;
float ns = -scale;
scale = ( x >= 0.0f ) ? scale : ns;
w = SPIRV_OCL_BUILTIN(native_exp2, _f32, )( w + scale ); // this should be exact
float res = w * tf;
res = ( px < as_float( 0xC2D20000 ) ) ? as_float( 0x00000000 ) : res;
res = ( px > as_float( 0x42D20000 ) ) ? as_float( 0x7F800000 ) : res;
float rx = SPIRV_OCL_BUILTIN(native_recip, _f32, )( res );
res = ( x >= 0.0f ) ? res : rx;
return res;
}
#endif //__EXP_HYPERBOLIC_CL__
|