File: exp.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (85 lines) | stat: -rw-r--r-- 2,727 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../include/BiF_Definitions.cl"
#include "../../Headers/spirv.h"

#if defined(cl_khr_fp64)
    #include "../IMF/FP64/exp_d_la.cl"
    #include "../IMF/FP64/exp_d_la_noLUT.cl"
#endif // defined(cl_khr_fp64)

float SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(exp, _f32, )(float x)
{
    if (__FastRelaxedMath)
    {
        return SPIRV_OCL_BUILTIN(native_exp, _f32, )(x);
    }
    else
    {
        // e^x = 2^(log2(e^x)) = 2^(x * log2(e))
        // We'll compute 2^(x * log2(e)) by splitting x * log2(e)
        //   into a whole part and fractional part.

        // Compute the whole part of x * log2(e)
        // This part is easy!
        float w = SPIRV_OCL_BUILTIN(trunc, _f32, )( x * M_LOG2E_F );

        // Compute the fractional part of x * log2(e)
        // We have to do this carefully, so we don't lose precision.
        // Compute as:
        //   fract( x * log2(e) ) = ( x - w * C1 - w * C2 ) * log2(e)
        // C1 is the "Cephes Constant", and is close to 1/log2(e)
        // C2 is the difference between the "Cephes Constant" and 1/log2(e)
        const float C1 = as_float( 0x3F317200 );    // 0.693145751953125
        const float C2 = as_float( 0x35BFBE8E );    // 0.000001428606765330187
        float f = x;
        f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C1, f );
        f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )( w, -C2, f );
        f = f * M_LOG2E_F;

        w = SPIRV_OCL_BUILTIN(native_exp2, _f32, )( w );   // this should be exact
        f = SPIRV_OCL_BUILTIN(native_exp2, _f32, )( f );   // this should be close enough

        float res = w * f;
        res = ( x < as_float( 0xC2D20000 ) ) ? as_float( 0x00000000 ) : res;
        res = ( x > as_float( 0x42D20000 ) ) ? as_float( 0x7F800000 ) : res;

        return res;
    }
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARGS( exp, float, float, f32 )

#if defined(cl_khr_fp64)

INLINE double SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(exp, _f64, )( double x )
{
    double result;
    if (__UseHighAccuracyMath) {
        result = __ocl_svml_exp_noLUT(x);
    } else {
        result = __ocl_svml_exp(x);
    }
    return result;
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARG_LOOP( exp, double, double, f64 )

#endif // defined(cl_khr_fp64)

#if defined(cl_khr_fp16)

INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(exp, _f16, )( half x )
{
    return SPIRV_OCL_BUILTIN(exp, _f32, )((float)x);
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARGS( exp, half, half, f16 )

#endif // defined(cl_khr_fp16)