File: fptoi.cpp

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (113 lines) | stat: -rw-r--r-- 3,709 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*========================== begin_copyright_notice ============================

Copyright (C) 2021-2023 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include <cm-cl/math.h>
#include <cm-cl/vector.h>

#include "../helpers.h"

using namespace cm;

namespace {
template <bool IsSigned, int N>
CM_NODEBUG CM_INLINE vector<uint64_t, N> __impl_fptoi(vector<half, N> a) {
  vector<uint16_t, N> Val16 = a.template format<uint16_t>();
  vector<uint32_t, N> Val = Val16;
  const vector<uint32_t, N> Zero(0);
  const vector<uint32_t, N> Ones(0xffffffff);
  const vector<uint32_t, N> One(1);
  const vector<uint32_t, N> ExpMask = vector<uint32_t, N>(0x1f);
  const vector<uint32_t, N> MantissaMask = vector<uint32_t, N>(0x3ff);

  vector<uint32_t, N> SignedBitMask(1u << 15);
  vector<uint32_t, N> SignedBit = Val & SignedBitMask;
  vector<uint32_t, N> Exp = (Val >> 10) & ExpMask;
  vector<uint32_t, N> Mant = Val & MantissaMask;
  auto FlagSignSet = (SignedBit != Zero);
  auto FlagNoSignSet = (SignedBit == Zero);

  // check for Exponent overflow (when sign bit set)
  auto FlagExpO = (Exp == vector<uint32_t, N>(0x1f));
  auto FlagExpUO = FlagNoSignSet & FlagExpO;
  auto IsNaN = FlagExpO & (Mant != Zero);
  vector<uint32_t, N> LoRes = a;
  vector<uint32_t, N> HiRes = Zero;
  if constexpr (IsSigned) {
    vector<uint32_t, N> IntNegA = -a;
    LoRes.merge(IntNegA, FlagSignSet);
    // calculate (NOT[Lo, Hi] + 1) (integer sign negation)
    vector<uint32_t, N> NegLo = ~LoRes;
    vector<uint32_t, N> NegHi = ~HiRes;

    auto AddC = cm::math::add_with_carry(NegLo, One);
    auto AddcRes = AddC.first;
    auto AddcResCB = AddC.second;
    NegHi = NegHi + AddcResCB;

    // if sign bit is set, alter the result with negated value
    // if (FlagSignSet)
    LoRes.merge(AddcRes, FlagSignSet);
    HiRes.merge(NegHi, FlagSignSet);

    // if (FlagExpO)
    LoRes.merge(Zero, FlagExpO);
    HiRes.merge(vector<uint32_t, N>(1u << 31), FlagExpO);

    // if (FlagExpUO)
    LoRes.merge(Ones, FlagExpUO);
    HiRes.merge(vector<uint32_t, N>((1u << 31) - 1), FlagExpUO);

    // if (IsNaN)
    LoRes.merge(Zero, IsNaN);
    HiRes.merge(Zero, IsNaN);

  } else {
    LoRes.merge(Zero, FlagSignSet);
    HiRes.merge(Zero, FlagSignSet);

    // if (FlagExpUO)
    LoRes.merge(Ones, FlagExpUO);
    HiRes.merge(Ones, FlagExpUO);

    // if (IsNaN)
    LoRes.merge(Zero, IsNaN);
    HiRes.merge(Zero, IsNaN);
  }

  return __impl_combineLoHi<N>(LoRes, HiRes);
}
} // namespace

CM_NODEBUG CM_NOINLINE extern "C" uint64_t __vc_builtin_fptosi_f16(half a) {
  vector<half, 1> va = a;
  return __impl_fptoi<true>(va)[0];
}

CM_NODEBUG CM_NOINLINE extern "C" uint64_t __vc_builtin_fptoui_f16(half a) {
  vector<half, 1> va = a;
  return __impl_fptoi<false>(va)[0];
}

#define FPTOI(WIDTH)                                                           \
  CM_NODEBUG CM_NOINLINE extern "C" cl_vector<uint64_t, WIDTH>                 \
      __vc_builtin_fptosi_v##WIDTH##f16(cl_vector<half, WIDTH> a) {            \
    vector<half, WIDTH> va{a};                                                 \
    return __impl_fptoi<true>(va).cl_vector();                                 \
  }                                                                            \
  CM_NODEBUG CM_NOINLINE extern "C" cl_vector<uint64_t, WIDTH>                 \
      __vc_builtin_fptoui_v##WIDTH##f16(cl_vector<half, WIDTH> a) {            \
    vector<half, WIDTH> va{a};                                                 \
    return __impl_fptoi<false>(va).cl_vector();                                \
  }

FPTOI(1)
FPTOI(2)
FPTOI(4)
FPTOI(8)
FPTOI(16)
FPTOI(32)