File: exp_for_hyper.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (123 lines) | stat: -rw-r--r-- 4,489 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../../Headers/spirv.h"

#ifndef __EXP_HYPERBOLIC_CL__
#define __EXP_HYPERBOLIC_CL__

// These versions of exp are only used for hyperbolic functions:
//      sinh, cosh and tanh.

// There are two main differences between them and the regular old
// version of exp:
//  - This version uses a polynomial approximation for the
//    fractional part of 2^x, vs. the native version.  As such
//    it is slightly more accurate.
//  - This version accepts an optional "scale factor" that can
//    be used to scale the result by 2^scale, without losing
//    accuracy.

// This version is used for sinh and cosh, and part of tanh:
float __intel_exp_for_hyper(float x, float scale)
{
    // e^x = 2^(log2(e^x)) = 2^(x * log2(e))
    // We'll compute 2^(x * log2(e)) by splitting x * log2(e)
    //   into a whole part and fractional part.

    // Compute the whole part of x * log2(e)
    // This part is easy!
    // Note: floor rounds to negative infinity.
    float w = SPIRV_OCL_BUILTIN(floor, _f32, )( x * M_LOG2E_F + 0.5f );

    // Compute the fractional part of x * log2(e)
    // We have to do this carefully, so we don't lose precision.
    // Compute as:
    //   fract( x * log2(e) ) = ( x - w * C1 - w * C2 ) * log2(e)
    // C1 is the "Cephes Constant", and is close to 1/log2(e)
    // C2 is the difference between the "Cephes Constant" and 1/log2(e)
    const float C1 = as_float( 0x3F317200 );    // 0.693145751953125
    const float C2 = as_float( 0x35BFBE8E );    // 0.000001428606765330187
    float f = x;
    f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C1, f );
    f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C2, f );

    // Do a polynomial approximation for 2^fractional.
    float f2 = f * f;
    f = ((((( 1.9875691500E-4f  * f
            + 1.3981999507E-3f) * f
            + 8.3334519073E-3f) * f
            + 4.1665795894E-2f) * f
            + 1.6666665459E-1f) * f
            + 5.0000001201E-1f) * f2
            + f
            + 1.0f;

    // By doing this computation as 2^(w - 2) * 2^2 we can avoid an
    // overflow case for very large values of w.
    w = SPIRV_OCL_BUILTIN(native_exp2, _f32, )( w + scale );   // this should be exact

    float res = w * f;
    res = ( x < as_float( 0xC2D20000 ) ) ? as_float( 0x00000000 ) : res;
    res = ( x > as_float( 0x42D20000 ) ) ? as_float( 0x7F800000 ) : res;

    return res;
}

float __intel_exp_for_tanh(float x, float scale)
{
    float px = SPIRV_OCL_BUILTIN(fabs, _f32, )(x);

    // e^x = 2^(log2(e^x)) = 2^(x * log2(e))
    // We'll compute 2^(x * log2(e)) by splitting x * log2(e)
    //   into a whole part and fractional part.

    // Compute the whole part of x * log2(e)
    // This part is easy!
    // Note: floor rounds to negative infinity.
    float w = SPIRV_OCL_BUILTIN(floor, _f32, )( px * M_LOG2E_F + 0.5f );

    // Compute the fractional part of x * log2(e)
    // We have to do this carefully, so we don't lose precision.
    // Compute as:
    //   fract( x * log2(e) ) = ( x - w * C1 - w * C2 ) * log2(e)
    // C1 is the "Cephes Constant", and is close to 1/log2(e)
    // C2 is the difference between the "Cephes Constant" and 1/log2(e)

    const float C1 = as_float( 0x3F317200 );    // 0.693145751953125
    const float C2 = as_float( 0x35BFBE8E );    // 0.000001428606765330187
    float f = px;
    f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C1, f );
    f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C2, f );

    // Do a polynomial approximation for 2^fractional.
    float tf =
        ((((( 1.9875691500E-4f  * f
            + 1.3981999507E-3f) * f
            + 8.3334519073E-3f) * f
            + 4.1665795894E-2f) * f
            + 1.6666665459E-1f) * f
            + 5.0000001201E-1f) * f * f
            + f
            + 1.0f;

    float ns = -scale;
    scale = ( x >= 0.0f ) ? scale : ns;
    w = SPIRV_OCL_BUILTIN(native_exp2, _f32, )( w + scale );  // this should be exact

    float res = w * tf;
    res = ( px < as_float( 0xC2D20000 ) ) ? as_float( 0x00000000 ) : res;
    res = ( px > as_float( 0x42D20000 ) ) ? as_float( 0x7F800000 ) : res;

    float rx = SPIRV_OCL_BUILTIN(native_recip, _f32, )( res );
    res = ( x >= 0.0f ) ? res : rx;

    return res;
}

#endif  //__EXP_HYPERBOLIC_CL__