File: tgamma_s_ep.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (597 lines) | stat: -rw-r--r-- 21,555 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
/*========================== begin_copyright_notice ============================

Copyright (C) 2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF

/*
//++
//
//  ALGORITHM DESCRIPTION
//  ---------------------
// The method consists of three cases.
//
// If       2 <= x < OVERFLOW_BOUNDARY
// else if  0 < x < 2
// else if  -(i+1) <  x < -i, i = 0...43
//
// Case 2 <= x < OVERFLOW_BOUNDARY
// -------------------------------
//   Here we use algorithm based on the recursive formula
//   GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
//   [2; OVERFLOW_BOUNDARY] into intervals [8*n; 8*(n+1)] and
//   approximate GAMMA(x) by polynomial of 22th degree on each
//   [8*n; 8*n+1], recursive formula is used to expand GAMMA(x)
//   to [8*n; 8*n+1]. In other words we need to find n, i and r
//   such that x = 8 * n + i + r where n and i are integer numbers
//   and r is fractional part of x. So GAMMA(x) = GAMMA(8*n+i+r) =
//   = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
//   = (x-1)*(x-2)*...*(x-i)*GAMMA(8*n+r) ~
//   ~ (x-1)*(x-2)*...*(x-i)*P12n(r).
//
//   Step 1: Reduction
//   -----------------
//    N = [x] with truncate
//    r = x - N, note 0 <= r < 1
//
//    n = N & ~0x7 - index of table that contains coefficient of
//                   polynomial approximation
//    i = N & 0x7  - is used in recursive formula
//
//
//   Step 2: Approximation
//   ---------------------
//    We use factorized minimax approximation polynomials
//    P12n(r) = A12*(r^2+C01(n)*r+C00(n))*
//              *(r^2+C11(n)*r+C10(n))*...*(r^2+C51(n)*r+C50(n))
//
//   Step 3: Recursion
//   -----------------
//    In case when i > 0 we need to multiply P12n(r) by product
//    R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
//    we can calculate R as follow:
//    R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
//    even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
//    *(i-1) if i is odd. In both cases we need to calculate
//    R2(i,x) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
//    = ((x^2-x)+2*(1-x))*((x^2-x)+6*(2-x))*...*((x^2-x)+2*(2*j-1)*(j-x)) =
//    = (RA+2*RB)*(RA+6*(1-RB))*...*(RA+2*(2*j-1)*(j-1+RB))
//    where j = 1..[i/2], RA = x^2-x, RB = 1-x.
//
//   Step 4: Reconstruction
//   ----------------------
//    Reconstruction is just simple multiplication i.e.
//    GAMMA(x) = P12n(r)*R(i,x)
//
// Case 0 < x < 2
// --------------
//    To calculate GAMMA(x) on this interval we do following
//        if 1.0  <= x < 1.25  than  GAMMA(x) = P7(x-1)
//        if 1.25 <= x < 1.5   than  GAMMA(x) = P7(x-x_min) where
//              x_min is point of local minimum on [1; 2] interval.
//        if 1.5  <= x < 1.75  than  GAMMA(x) = P7(x-1)
//        if 1.75 <= x < 2.0   than  GAMMA(x) = P7(x-1)
//    and
//        if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
//
// Case -(i+1) <  x < -i, i = 0...43
// ----------------------------------
//    Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
//    so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
//    GAMMA(x) is described above.
//
//   Step 1: Reduction
//   -----------------
//    Note that period of sin(PI*x) is 2 and range reduction for
//    sin(PI*x) is like to range reduction for GAMMA(x)
//    i.e rs = x - round(x) and |rs| <= 0.5.
//
//   Step 2: Approximation
//   ---------------------
//    To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI =
//    = (-1)^n*sin(PI*rs)/PI Taylor series is used.
//    sin(PI*rs)/PI ~ S17(rs).
//
//   Step 3: Division
//   ----------------
//    1/x and 1/(GAMMA(x)*S12(rs))
//--
*/

//
// Static data section:
//

// negative values underflow range
static __constant float __stgamma_ep__neg_underflow[2] = { 43.0, -43.0 };

// negative values "half" overflow range - multiply by 1/An
static __constant float __stgamma_ep__neg_half_overflow[2] = { 40.0, -40.0 };

// overflow boundary (35.04010009765625)
static __constant unsigned int __stgamma_ep__overflowf_boundary[] = {
    0x420c2910, //       35.0401001
};

// point of local minium (0.461632144968362356785)
static __constant unsigned int __stgamma_ep__local_minimumf[] = {
    0x3eec5b0c, //      0.461632133
};

static __constant unsigned int __stgamma_ep__tgammaf_A_table[] = {
    0x404f7e8e, //       3.24209929
    0x3ffd9a9c, //        1.9812808
    0x3ece7348, //      0.403223276
    0xbfc61856, //      -1.54761767
    0xc0819138, //       -4.0489769
    0xc0f8b8d2, //      -7.77256107
    0x4070be98, //       3.76163292
    0x40834492, //        4.1021204
    0x40a47c71, //        5.1401906
    0x40e300cb, //        7.0938468
    0x412b18a4, //       10.6935158
    0x419a0368, //       19.2516632
    0x3710d6c4, //   8.63307287e-06
    //
    0x4031115d, //       2.76668477
    0x4012293a, //       2.28376627
    0x3faa2c14, //       1.32947016
    0xbdde9083, //     -0.108674072
    0xc008779a, //       -2.1322999
    0xc0a495c1, //      -5.14328051
    0x4002a105, //       2.04107785
    0x40196098, //       2.39652061
    0x404ce483, //       3.20144725
    0x4095cb95, //       4.68110132
    0x40eb998f, //       7.36249495
    0x41503404, //       13.0126991
    0x3f37b6dd, //      0.717634022
    //
    0x40009880, //       2.00930786
    0x3fd51b17, //       1.66488922
    0x3f758f9a, //      0.959222436
    0xbe1ddf37, //      -0.15417181
    0xbfe5b4c3, //      -1.79457891
    0xc08adf72, //      -4.33977604
    0x3f8a703f, //       1.08155048
    0x3fa9dbb6, //       1.32701755
    0x3ff317fc, //       1.89916945
    0x403f19a4, //       2.98593998
    0x40a028f0, //       5.00499725
    0x4114ea11, //       9.30714512
    0x4f4d95ba, //   3.44914176e+09
    //
    0x3fd7bdea, //       1.68548322
    0x3fb156c6, //       1.38546062
    0x3f44a1e6, //      0.768095374
    0xbe59cea3, //     -0.212702319
    0xbfd5e56e, //      -1.67106414
    0xc07d6f4c, //      -3.95991802
    0x3f43cd7a, //      0.764854074
    0x3f763ef6, //      0.961898208
    0x3fb6d4f7, //       1.42837417
    0x401514f5, //       2.32940412
    0x4080bdbd, //       4.02316141
    0x40f4a0ee, //       7.64464474
    0x6194eadb, //   3.43380155e+20
    //
    0x3fbf659d, //       1.49528849
    0x3f9c0ad4, //       1.21908045
    0x3f2673b7, //      0.650203168
    0xbe82926e, //      -0.25502342
    0xbfcd5ec8, //      -1.60445499
    0xc06ec4cb, //      -3.73076129
    0x3f1ac297, //      0.604531705
    0x3f463b08, //      0.774338245
    0x3f97218a, //       1.18071103
    0x3ffcb6e2, //       1.97433114
    0x405e7ba8, //       3.47629738
    0x40d60a70, //       6.68877411
    0x757fa655, //   3.24074539e+32
    //
    0x3faee727, //       1.36642921
    0x3f8d86c6, //       1.10567546
    0x3f118ef6, //      0.568587661
    0xbe928f65, //     -0.286250263
    0xbfc7da28, //       -1.5613451
    0xc064b2c7, //       -3.5734117
    0x3f01b457, //      0.506658018
    0x3f289d72, //      0.658652425
    0x3f834429, //       1.02551758
    0x3fdfbe3e, //       1.74799323
    0x4047cb08, //       3.12176704
    0x40c1dd04, //       6.05822945
    0x3f800000, //                1
};

// sin(pi*x)/pi
static __constant unsigned int __stgamma_ep__tgammaf_sin_table[] = {
    0xb60a2594, //  -2.05854758e-06
    0x3487e4c9, //   2.53121726e-07
    0x42aa0ebd, //       85.0287857
    0xc137d86a, //      -11.4903355
    0x43672c7a, //       231.173737
    0x3ffd8afd, //       1.98080409
    0x42229bc3, //       40.6521111
    0xc11eacd2, //      -9.91719246
};

static __constant unsigned int __stgamma_ep__tgammaf_A100_table[] = {
    0x3f800000, //                1
    0xbf13c466, //     -0.577215552
    0x3f7d3247, //      0.989048421
    0xbf684007, //      -0.90722698
    0x3f7a42bd, //      0.977580845
    0xbf71b908, //     -0.944229603
    0x3f4b7ff7, //      0.794921339
    0xbecc84b2, //     -0.399449885
};

static __constant unsigned int __stgamma_ep__tgammaf_A125_table[] = {
    0x3f62b6e4, //      0.885603189
    0xaf74351d, //  -2.22105404e-10
    0x3edb62a3, //      0.428486913
    0xbe05d6f4, //     -0.130702794
    0x3e24b501, //      0.160846725
    0xbdbf3fc2, //    -0.0933833271
    0x3d791ba9, //     0.0608173944
    0xbda1091b, //    -0.0786306486
};

static __constant unsigned int __stgamma_ep__tgammaf_A150_table[] = {
    0x3f7fe30a, //      0.999558091
    0xbf122cb9, //     -0.570994914
    0x3f732aac, //      0.949869871
    0xbf42da14, //     -0.761140108
    0x3f1e4866, //      0.618292212
    0xbeace98e, //     -0.337719381
    0x3dfa101f, //      0.122101061
    0xbca3985b, //    -0.0199701097
};

static __constant unsigned int __stgamma_ep__tgammaf_A175_table[] = {
    0x3f7f70f3, //      0.997817218
    0xbf0e0d77, //     -0.554892957
    0x3f62b540, //      0.885578156
    0xbf1e1256, //     -0.617467284
    0x3ed9316d, //      0.424205214
    0xbe378738, //     -0.179226756
    0x3d4b71bf, //     0.0496690236
    0xbbba3543, //   -0.00568261882
};

// Right shifter

static __constant unsigned __stgamma_ep__two_23h[] = { 0x4b000000 };    // 2^23

// Special values

static __constant unsigned int __stgamma_ep__own_large_value_32[] = { 0x71800000, 0xf1800000 }; // +2^100,-2^100

static __constant unsigned int __stgamma_ep__own_small_value_32[] = { 0x0d800000, 0x8d800000 }; // +2^(-100),-2^(-100)

// constants

static __constant unsigned int __stgamma_ep__zeros[] = { 0x00000000, 0x80000000 };
static __constant unsigned int __stgamma_ep__ones[] = { 0x3f800000, 0xbf800000 };
static __constant unsigned int __stgamma_ep__infs[] = { 0x7f800000, 0xff800000 };
static __constant float __stgamma_ep_twos[] = { 2.0, -2.0 };
static __constant float __stgamma_ep_ones_5[] = { 1.5, -1.5 };
static __constant float __stgamma_ep_ones_25[] = { 1.25, -1.25 };
static __constant float __stgamma_ep_ones_75[] = { 1.75, -1.75 };

#pragma float_control(precise,on)

__attribute__((always_inline))
inline int __internal_stgamma_ep_cout (float *a, float *r)
{
    int nRet = 0;
    int t = 0, i = 0, j = 0, irsign = 0;
    int ix = 0, iabsx_n = 0, iabsx_t = 0, ixsign = 0, ixexp = 0;
    float dix = 0.0f, diabsx_n = 0.0f, diabsx_t = 0.0f;
    float tv = 0.0f;
    float x = 0.0f;
    float absx = 0.0f, res = 0.0f;

    float resf = 0.0f;
    float s = 0.0f, r2 = 0.0f, r3 = 0.0f, rrr = 0.0f;
    float curabsx = 0.0f;
    float p = 0.0f, pr = 0.0f;
    __constant float *A;
    __constant float *Af;
    x = *(a);
    absx = x;
    res = ((__constant float *) __stgamma_ep__zeros)[0];

    // get arg sign
    ixsign = (((_iml_sp_union_t *) & x)->hex[0] >> 31);
    // get arg exponent
    ixexp = ((((_iml_sp_union_t *) & x)->hex[0] >> 23) & 0xFF);

    // normal values
    if (ixexp != 0xFF)
    {
        // create absolute value
        (((_iml_sp_union_t *) & absx)->hex[0] = (((_iml_sp_union_t *) & absx)->hex[0] & 0x7FFFFFFF) | ((_iml_uint32_t) (0) << 31));

        ix = *((int *) (&absx));

        // if x == 0 - zero divide exception
        if (x == ((__constant float *) __stgamma_ep__zeros)[0])
        {
            *r = (((__constant float *) __stgamma_ep__ones)[(ixsign)] / ((__constant float *) __stgamma_ep__zeros)[0]);
            nRet = 2;
            return nRet;
        }

        if (ix <= 0x00200000)   // if |x| < denorm_overflow
        {
            {
                float tz = ((__constant float *) __stgamma_ep__own_large_value_32)[0];
                ((*r)) = (((__constant float *) __stgamma_ep__own_large_value_32)[(ixsign)] * tz);
            };  // raise overflow
            nRet = 3;
            return nRet;
        }

        // singularity at negative integer points
        if (ixsign)
        {
            // if |x| >= 2^23 - only integer values
            if (ixexp >= 0x00000096)
            {
                {
                    float tz = ((__constant float *) __stgamma_ep__zeros)[0];
                    ((*r)) = (((__constant float *) __stgamma_ep__zeros)[(0)] / tz);
                };
                nRet = 1;
                return nRet;
            }   // if(ixexp >= 0x00000096)
            else
            {
                // get integer value of arg (truncated)
                tv = absx + (*(__constant float *) __stgamma_ep__two_23h);
                diabsx_t = tv - (*(__constant float *) __stgamma_ep__two_23h);
                iabsx_t = (0x000fffff) & (*((int *) (&tv)));
                if (diabsx_t > absx)
                {
                    diabsx_t -= ((__constant float *) __stgamma_ep__ones)[0];
                    iabsx_t -= 1;
                }
            }   // else if(ixexp >= 0x00000096)

            // if arg - integer then singularity
            if (absx == diabsx_t)
            {
                {
                    float tz = ((__constant float *) __stgamma_ep__zeros)[0];
                    ((*r)) = (((__constant float *) __stgamma_ep__zeros)[(0)] / tz);
                };
                nRet = 1;
                return nRet;
            }

            // if arg < -185.0 then underflow (values rounded to zero)
            if (x < __stgamma_ep__neg_underflow[1])
            {
                (*r) = (((__constant float *) __stgamma_ep__own_small_value_32)[((~iabsx_t) & 1)] * ((__constant float *) __stgamma_ep__own_small_value_32)[0]);    // raise underflow and inexact
                nRet = 4;
                return nRet;
            }
        }   // if(ixsign)

        // big positive values overflow domain (res rounded to INF)
        if (x >= (*((__constant float *) __stgamma_ep__overflowf_boundary)))
        {
            {
                float tz = ((__constant float *) __stgamma_ep__own_large_value_32)[0];
                ((*r)) = (((__constant float *) __stgamma_ep__own_large_value_32)[(0)] * tz);
            };  // raise overflow and inexact
            nRet = 3;
            return nRet;
        }

        // compute sin(Pi*x)/x for negative values
        if (ixsign)
        {
            // get rounded to nearest abs arg
            tv = absx + (*(__constant float *) __stgamma_ep__two_23h);
            diabsx_n = tv - (*(__constant float *) __stgamma_ep__two_23h);
            iabsx_n = (0x000fffff) & (*((int *) (&tv)));

            rrr = absx - diabsx_n;  // reduced argument

            if (rrr < 0)
                rrr = (-rrr);   //IML_ABS_DP(rrr);  // remove sign

            r2 = rrr * rrr; // rrr^2

            // Tailor series
            s = rrr +
                rrr *
                ((r2 * (((__constant float *) __stgamma_ep__tgammaf_sin_table)[0] + r2 * ((__constant float *) __stgamma_ep__tgammaf_sin_table)[1])) *
                 (((__constant float *) __stgamma_ep__tgammaf_sin_table)[2] +
                  r2 * (r2 +
                        ((__constant float *) __stgamma_ep__tgammaf_sin_table)[3])) * (((__constant float *) __stgamma_ep__tgammaf_sin_table)[4] +
                                                                                       r2 * (r2 +
                                                                                             ((__constant float *)
                                                                                              __stgamma_ep__tgammaf_sin_table)[5])) *
                 (((__constant float *) __stgamma_ep__tgammaf_sin_table)[6] + r2 * (r2 + ((__constant float *) __stgamma_ep__tgammaf_sin_table)[7])));
        }   // if(ixsign)

        // get truncated integer argument
        tv = absx + (*(__constant float *) __stgamma_ep__two_23h);
        diabsx_t = tv - (*(__constant float *) __stgamma_ep__two_23h);
        iabsx_t = (0x000fffff) & (*((int *) (&tv)));

        if (diabsx_t > absx)
        {
            diabsx_t -= ((__constant float *) __stgamma_ep__ones)[0];
            iabsx_t -= 1;
        }
        // get result sign
        irsign = ((iabsx_t + 1) & 1);
        // if x > 2.0 - simple polynomials
        if (absx >= __stgamma_ep_twos[0])
        {
            t = iabsx_t & (~0x7);   // index of table of coefficient
            i = iabsx_t & (0x7);    // used in recursive formula
            // for 2 <= x < 8 - shift index
            if (iabsx_t < 8)
                i = i - 2;

            rrr = absx - diabsx_t;  // reduced argument
            A = &(((__constant float *) __stgamma_ep__tgammaf_A_table)[t + (t >> 1) + (t >> 3)]);   // table address
            r2 = rrr * rrr; // rrr^2

            // factorized polynomial
            p = A[12] * (r2 + A[0] * rrr + A[6 + 0]) *
                (r2 + A[1] * rrr + A[6 + 1]) *
                (r2 + A[2] * rrr + A[6 + 2]) * (r2 + A[3] * rrr + A[6 + 3]) * (r2 + A[4] * rrr + A[6 + 4]) * (r2 + A[5] * rrr + A[6 + 5]);

            // if no recursion - p = 1.0
            pr = ((__constant float *) __stgamma_ep__ones)[0];
            // if i > 0 - recursies
            if (i)
            {
                for (j = 1; j <= i; j++)
                {
                    pr *= (absx - j);
                }
            }

            if (ixsign) // for negatives rrr = 1/(x*s*gamma*recursies)
            {
                //[40; 48] for negatives

                unsigned int __stgamma_ep__tgamma_A40_inv[] = {
                    0xEDBCC440, 0x368954EA  // 1/An
                };

                double resd = (double) ((__constant float *) __stgamma_ep__ones)[0] / ((double) absx * s * p * pr);
                if (x < __stgamma_ep__neg_half_overflow[1])
                {
                    resd *= (*((double *) __stgamma_ep__tgamma_A40_inv));
                }
                // set sign
                if (irsign)
                    resd = -resd;
                (*r) = (float) resd;
            }
            else    // for positives rrr = gamma*recursies
            {
                (*r) = p * pr;
            }

            return nRet;
        }   // if(absx >= _VSTATIC(twos)[0])
        else
        {
            // if |x| < 1 - calculate gamma(x+1)
            if (absx < ((__constant float *) __stgamma_ep__ones)[0])
            {
                curabsx = absx + (((__constant float *) __stgamma_ep__ones)[0]);
            }
            else
            {
                curabsx = absx;
            }

            // splitted intervals:
            // x >= 1.75
            if (curabsx >= __stgamma_ep_ones_75[0])
            {
                rrr = curabsx - (((__constant float *) __stgamma_ep__ones)[0]);
                A = ((__constant float *) __stgamma_ep__tgammaf_A175_table);
            }
            else if (curabsx >= __stgamma_ep_ones_5[0]) // x >= 1.5
            {
                rrr = curabsx - (((__constant float *) __stgamma_ep__ones)[0]);
                A = ((__constant float *) __stgamma_ep__tgammaf_A150_table);
            }
            else if (curabsx >= __stgamma_ep_ones_25[0])    // 1.5 > x >= 1.25
            {
                rrr = curabsx - ((((__constant float *) __stgamma_ep__ones)[0]) + (*((__constant float *) __stgamma_ep__local_minimumf)));
                A = ((__constant float *) __stgamma_ep__tgammaf_A125_table);
            }
            else if (curabsx < __stgamma_ep_ones_25[0]) // 0 < x < 1.25
            {
                rrr = curabsx - (((__constant float *) __stgamma_ep__ones)[0]);
                A = ((__constant float *) __stgamma_ep__tgammaf_A100_table);
            }

            if (ixexp)  // for normal values - compute whole polynomial
            {
                p = A[0] + rrr * (A[1] + rrr * (A[2] + rrr * (A[3] + rrr * (A[4] + rrr * (A[5] + rrr * (A[6] + rrr * (A[7])))))));
            }
            else    // for denormal - return just A[0]
            {
                p = A[0];
            }

            if (absx < ((__constant float *) __stgamma_ep__ones)[0])    // |x| < 1.0
            {
                if (ixsign) // if x < 0 then rrr = 1/(s*p)
                {
                    resf = (((__constant float *) __stgamma_ep__ones)[0]) / (s * p);
                    if (irsign)
                        resf = -resf;
                }
                else    // if x > 0 then rrr = p/x
                {
                    resf = p / (absx);
                }
            }   // if(absx < ones[0]) // |x| < 1.0
            else    // |x| > 1.0
            {
                if (ixsign) // rrr = 1/(x*s*p);
                {
                    resf = (((__constant float *) __stgamma_ep__ones)[0]) / ((absx) * s * p);
                }
                else    // rrr = p
                {
                    resf = p;
                }
            }   // else if(absx < ones[0]) // |x| < 1.0

            (*r) = (float) resf;
            return nRet;
        }   // else if(absx >= _VSTATIC(twos)[0])
    }   // if (ixexp != IML_EXPINF_32)
    else    // INF or NAN
    {
        // Singularity at negative INF
        if (ixsign && (!((((_iml_sp_union_t *) & x)->hex[0] & 0x007FFFFF) != 0)))
        {
            {
                float tz = ((__constant float *) __stgamma_ep__zeros)[0];
                ((*r)) = (((__constant float *) __stgamma_ep__zeros)[(1)] / tz);
            };
            nRet = 1;
            return nRet;
        }
        else
        {
            (*r) = x + x;   // raise invalid on SNaN
            return nRet;
        }
    }   // else if (ixexp != IML_EXPINF_32)
}   // inline int __internal_stgamma_ep_cout (float *a, float *r)

float __ocl_svml_tgammaf (float a)
{
    float va1;
    float vr1;
    float r;

    va1 = a;
    __internal_stgamma_ep_cout (&va1, &vr1);
    r = vr1;

    return r;
}