File: tgamma.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (96 lines) | stat: -rw-r--r-- 2,874 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../include/BiF_Definitions.cl"
#include "../../Headers/spirv.h"

#define USE_IMF_TGAMMA_IMPL 1

#ifdef USE_IMF_TGAMMA_IMPL
#include "../IMF/FP32/tgamma_s_noFP64.cl"
#endif // USE_IMF_TGAMMA_IMPL

#define SQRT_2PI                (as_float(0x40206C98)) // 2.5066282746310007f

// Computes the gamma functions using a Lanczos approximation:
static float __intel_gamma(float z)
{
    float p0 = as_float(0x3f800000);    // 1.0f
    float p1 = as_float(0x42985c35);    // 76.180092f
    float p2 = as_float(0xc2ad02b9);    // -86.505318f
    float p3 = as_float(0x41c01ce0);    // 24.014099
    float p4 = as_float(0xbf9da9a4);    // -1.2317395
    float p5 = as_float(0x3a9e6b99);    // 1.2086510e-3f
    float p6 = as_float(0xb6b508c1);    // -5.3952394e-6f
    float g = 5.0f; // number of coefficients - 2

    z -= 1;

    float x = p0;
    x += p1 / (z + 1);
    x += p2 / (z + 2);
    x += p3 / (z + 3);
    x += p4 / (z + 4);
    x += p5 / (z + 5);
    x += p6 / (z + 6);

    float t = z + g + 0.5f;
    return SQRT_2PI * SPIRV_OCL_BUILTIN(pow, _f32_f32, )(t, z + 0.5f) * SPIRV_OCL_BUILTIN(exp, _f32, )(-t) * x;
}

float SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(tgamma, _f32, )( float x )
{
#if USE_IMF_TGAMMA_IMPL
    return __ocl_svml_tgammaf(x);
#else // USE_IMF_TGAMMA_IMPL
    float ret;
    if ( (x < 0.0f) & (x == SPIRV_OCL_BUILTIN(floor, _f32, )(x))) {
        ret = SPIRV_OCL_BUILTIN(nan, _i32, )(0)
    } else {
        float y = 1.0f - x;
        float z = ( x < 0.5f ) ? y : x;
        // Note: z >= 0.5f.
        float g = __intel_gamma(z);

        ret = ( x < 0.5f ) ?
            M_PI_F / ( SPIRV_OCL_BUILTIN(sinpi, _f32, )(x) * g ) :
            g;

        // Special handling for -0.0f.
        // It may be possible to restrict this to renderscript only,
        // but for now we'll apply it across the board to stay on
        // the safe side, since this built-in is used infrequently.
        ret = ( as_uint(x) == FLOAT_SIGN_MASK ) ? -INFINITY : ret;
    }
    return ret;
#endif // USE_IMF_TGAMMA_IMPL
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARG_LOOP( tgamma, float, float, f32 )

#if defined(cl_khr_fp64)

INLINE double SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(tgamma, _f64, )( double x )
{
    return libclc_tgamma_f64(x);
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARG_LOOP( tgamma, double, double, f64 )

#endif // defined(cl_khr_fp64)

#if defined(cl_khr_fp16)

INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(tgamma, _f16, )( half x )
{
    return SPIRV_OCL_BUILTIN(tgamma, _f32, )((float)x);
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARG_LOOP( tgamma, half, half, f16 )

#endif // defined(cl_khr_fp16)