File: sqrt_cr.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (120 lines) | stat: -rw-r--r-- 3,967 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../include/BiF_Definitions.cl"
#include "../../Headers/spirv.h"

extern __constant int __Native64Bit;

#if defined(cl_khr_fp16)

INLINE
half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(sqrt_cr, _f16, )( half a )
{
    return (half)SPIRV_OCL_BUILTIN(sqrt_cr, _f32, )((float)a);
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARGS( sqrt_cr, half, half, f16 )

#endif // define(cl_khr_fp16)

float SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(sqrt_cr, _f32, )( float a )
{
    if (!__CRMacros)
    {
        typedef union binary32
        {
            uint u;
            int  s;
            float f;
        } binary32;

        binary32 fa, y0, onehalf, H0, H1, S0, S1, S, d0, e0;
        int aExp, sExp;
        fa.f = a;
        aExp = (fa.u >> 23) & 0xff;
        uint significand = fa.u & 0x7fffff;

        if (aExp == 0 & significand == 0) {
            /* return +/-zero for +/-zero */
            S.u = fa.u;
        }
        else if (aExp == 0xff) { /* NaN and Inf */
            if ((fa.u & 0x7fffff) == 0) { /* Inf */
                S.u = (fa.u & 0x80000000) ? 0xffc00000 : 0x7f800000;
            } else { /* NaN */
                S.u = fa.u | 0x400000; /* Quiet signalling NaN */
            }
        } else {
            if (fa.u & 0x80000000) { /* Negative normals/denormals */
                if (__FlushDenormals & (aExp == 0))
                    S.u = fa.u & 0x80000000;
                else
                    /* return qNaN for negative normal/denormal values */
                    S.u = 0xffc00000;
            } else if (__FlushDenormals & (aExp == 0)) {
                S.u = 0; // positive denorms
            } else { /* Positive normals/denormals */
                bool denorm = (aExp == 0);

                if (denorm & !__FlushDenormals) {
                    fa.f = SPIRV_OCL_BUILTIN(ldexp, _f32_i32, )(fa.f, 126);
                }
                else {
                    // Scale a to [1/2, 2)
                    fa.u = (fa.u & 0x00ffffff) | 0x3f000000;
                }

                // Initial approximation
                y0.f = SPIRV_OCL_BUILTIN(rsqrt, _f32, )(fa.f);
                onehalf.u = 0x3f000000;
                // Step(1), H0 = 1/2y0
                H0.f = onehalf.f * y0.f;
                // Step(2), S0 = a*y0
                S0.f = fa.f * y0.f;
                // Step(3), d0 = 1/2 - S0*H0
                d0.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(-S0.f, H0.f, onehalf.f);
                // Step(4), H1 = H0 + d0*H0
                H1.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(d0.f, H0.f, H0.f);
                // Step(5), S1 = S0 + d0*S0
                S1.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(d0.f, S0.f, S0.f);
                // Step(6), e0 = a - S1*S1
                e0.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(-S1.f, S1.f, fa.f);
                // Step(7), S = S1 + e0*H1
                S.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(e0.f, H1.f, S1.f);

                if (denorm & !__FlushDenormals) {
                    S.f = SPIRV_OCL_BUILTIN(ldexp, _f32_i32, )(S.f, -126/2);
                }
                else {
                    // Adjust exponent
                    sExp = ((aExp - FLOAT_BIAS) >> 1) + FLOAT_BIAS;
                    S.u = (S.u & 0x007fffff) | (sExp << 23);
                }
            }
        }
        return S.f;
    }
    else
    {
        return FSQRT_IEEE(a);
    }
}

#ifdef cl_fp64_basic_ops

INLINE double SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(sqrt_cr, _f64, )( double x )
{
        return SPIRV_OCL_BUILTIN(sqrt, _f64, )(x);
}

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARGS( sqrt_cr, double, double, f64 )

#endif // cl_fp64_basic_ops

GENERATE_SPIRV_OCL_VECTOR_FUNCTIONS_1ARGS( sqrt_cr, float, float, f32 )