File: F2S.hs

package info (click to toggle)
ghc 9.10.3-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 169,076 kB
  • sloc: haskell: 713,554; ansic: 84,184; cpp: 30,255; javascript: 9,003; sh: 7,870; fortran: 3,527; python: 3,228; asm: 2,523; makefile: 2,324; yacc: 1,570; lisp: 532; xml: 196; perl: 111; csh: 2
file content (304 lines) | stat: -rw-r--r-- 10,348 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
{-# LANGUAGE CPP #-}

-- |
-- Module      : Data.ByteString.Builder.RealFloat.F2S
-- Copyright   : (c) Lawrence Wu 2021
-- License     : BSD-style
-- Maintainer  : lawrencejwu@gmail.com
--
-- Implementation of float-to-string conversion

module Data.ByteString.Builder.RealFloat.F2S
    ( FloatingDecimal(..)
    , f2s
    , f2Intermediate
    ) where

import Control.Arrow (first)
import Data.Bits ((.|.), (.&.), unsafeShiftL, unsafeShiftR)
import Data.ByteString.Builder.Internal (Builder)
import Data.ByteString.Builder.Prim (primBounded)
import Data.ByteString.Builder.RealFloat.Internal
import GHC.Int (Int32(..))
import GHC.Word (Word32(..), Word64(..))

#if !PURE_HASKELL
import GHC.Ptr (Ptr(..))
#endif

-- See Data.ByteString.Builder.RealFloat.TableGenerator for a high-level
-- explanation of the ryu algorithm

#if !PURE_HASKELL
-- | Table of 2^k / 5^q + 1
--
-- > fmap (finv float_pow5_inv_bitcount) [0..float_max_inv_split]
foreign import ccall "&hs_bytestring_float_pow5_inv_split"
  float_pow5_inv_split :: Ptr Word64

-- | Table of 5^(-e2-q) / 2^k + 1
--
-- > fmap (fnorm float_pow5_bitcount) [0..float_max_split]
foreign import ccall "&hs_bytestring_float_pow5_split"
  float_pow5_split :: Ptr Word64
#endif

-- | Number of mantissa bits of a 32-bit float. The number of significant bits
-- (floatDigits (undefined :: Float)) is 24 since we have a leading 1 for
-- normal floats and 0 for subnormal floats
float_mantissa_bits :: Int
float_mantissa_bits = 23

-- | Number of exponent bits of a 32-bit float
float_exponent_bits :: Int
float_exponent_bits = 8

-- | Bias in encoded 32-bit float representation (2^7 - 1)
float_bias :: Int
float_bias = 127

data FloatingDecimal = FloatingDecimal
  { fmantissa :: !Word32
  , fexponent :: !Int32
  } deriving (Show, Eq)

-- | Multiply a 32-bit number with a 64-bit number while keeping the upper 64
-- bits. Then shift by specified amount minus 32
mulShift32 :: Word32 -> Word64 -> Int -> Word32
mulShift32 m factor shift =
  let factorLo = factor .&. mask 32
      factorHi = factor `unsafeShiftR` 32
      bits0 = word32ToWord64 m * factorLo
      bits1 = word32ToWord64 m * factorHi
      total  = (bits0 `unsafeShiftR` 32) + bits1
   in word64ToWord32 $ total `unsafeShiftR` (shift - 32)

-- | Index into the 64-bit word lookup table float_pow5_inv_split
get_float_pow5_inv_split :: Int -> Word64
#if !PURE_HASKELL
get_float_pow5_inv_split = getWord64At float_pow5_inv_split
#else
-- > putStr $ case64 (finv float_pow5_inv_bitcount) [0..float_max_inv_split]
get_float_pow5_inv_split i = case i of
  0  -> 0x800000000000001
  1  -> 0x666666666666667
  2  -> 0x51eb851eb851eb9
  3  -> 0x4189374bc6a7efa
  4  -> 0x68db8bac710cb2a
  5  -> 0x53e2d6238da3c22
  6  -> 0x431bde82d7b634e
  7  -> 0x6b5fca6af2bd216
  8  -> 0x55e63b88c230e78
  9  -> 0x44b82fa09b5a52d
  10 -> 0x6df37f675ef6eae
  11 -> 0x57f5ff85e592558
  12 -> 0x465e6604b7a8447
  13 -> 0x709709a125da071
  14 -> 0x5a126e1a84ae6c1
  15 -> 0x480ebe7b9d58567
  16 -> 0x734aca5f6226f0b
  17 -> 0x5c3bd5191b525a3
  18 -> 0x49c97747490eae9
  19 -> 0x760f253edb4ab0e
  20 -> 0x5e72843249088d8
  21 -> 0x4b8ed0283a6d3e0
  22 -> 0x78e480405d7b966
  23 -> 0x60b6cd004ac9452
  24 -> 0x4d5f0a66a23a9db
  25 -> 0x7bcb43d769f762b
  26 -> 0x63090312bb2c4ef
  27 -> 0x4f3a68dbc8f03f3
  28 -> 0x7ec3daf94180651
  29 -> 0x65697bfa9acd1da
  _  -> 0x51212ffbaf0a7e2
#endif

-- | Index into the 64-bit word lookup table float_pow5_split
get_float_pow5_split :: Int -> Word64
#if !PURE_HASKELL
get_float_pow5_split = getWord64At float_pow5_split
#else
-- > putStr $ case64 (fnorm float_pow5_bitcount) [0..float_max_split]
get_float_pow5_split i = case i of
  0  -> 0x1000000000000000
  1  -> 0x1400000000000000
  2  -> 0x1900000000000000
  3  -> 0x1f40000000000000
  4  -> 0x1388000000000000
  5  -> 0x186a000000000000
  6  -> 0x1e84800000000000
  7  -> 0x1312d00000000000
  8  -> 0x17d7840000000000
  9  -> 0x1dcd650000000000
  10 -> 0x12a05f2000000000
  11 -> 0x174876e800000000
  12 -> 0x1d1a94a200000000
  13 -> 0x12309ce540000000
  14 -> 0x16bcc41e90000000
  15 -> 0x1c6bf52634000000
  16 -> 0x11c37937e0800000
  17 -> 0x16345785d8a00000
  18 -> 0x1bc16d674ec80000
  19 -> 0x1158e460913d0000
  20 -> 0x15af1d78b58c4000
  21 -> 0x1b1ae4d6e2ef5000
  22 -> 0x10f0cf064dd59200
  23 -> 0x152d02c7e14af680
  24 -> 0x1a784379d99db420
  25 -> 0x108b2a2c28029094
  26 -> 0x14adf4b7320334b9
  27 -> 0x19d971e4fe8401e7
  28 -> 0x1027e72f1f128130
  29 -> 0x1431e0fae6d7217c
  30 -> 0x193e5939a08ce9db
  31 -> 0x1f8def8808b02452
  32 -> 0x13b8b5b5056e16b3
  33 -> 0x18a6e32246c99c60
  34 -> 0x1ed09bead87c0378
  35 -> 0x13426172c74d822b
  36 -> 0x1812f9cf7920e2b6
  37 -> 0x1e17b84357691b64
  38 -> 0x12ced32a16a1b11e
  39 -> 0x178287f49c4a1d66
  40 -> 0x1d6329f1c35ca4bf
  41 -> 0x125dfa371a19e6f7
  42 -> 0x16f578c4e0a060b5
  43 -> 0x1cb2d6f618c878e3
  44 -> 0x11efc659cf7d4b8d
  45 -> 0x166bb7f0435c9e71
  _  -> 0x1c06a5ec5433c60d
#endif

-- | Take the high bits of m * 2^k / 5^q / 2^-e2+q+k
mulPow5InvDivPow2 :: Word32 -> Int -> Int -> Word32
mulPow5InvDivPow2 m q j = mulShift32 m (get_float_pow5_inv_split q) j

-- | Take the high bits of m * 5^-e2-q / 2^k / 2^q-k
mulPow5DivPow2 :: Word32 -> Int -> Int -> Word32
mulPow5DivPow2 m i j = mulShift32 m (get_float_pow5_split i) j

-- | Handle case e2 >= 0
f2dGT :: Int32 -> Word32 -> Word32 -> Word32 -> (BoundsState Word32, Int32)
f2dGT e2' u v w =
  let e2 = int32ToInt e2'
      -- q = e10 = log_10(2^e2)
      q = log10pow2 e2
      -- k = B0 + log_2(5^q)
      k = float_pow5_inv_bitcount + pow5bits q - 1
      i = -e2 + q + k
      -- (u, v, w) * 2^k / 5^q / 2^-e2+q+k
      u' = mulPow5InvDivPow2 u q i
      v' = mulPow5InvDivPow2 v q i
      w' = mulPow5InvDivPow2 w q i
      !lastRemoved =
        if q /= 0 && fquot10 (w' - 1) <= fquot10 u'
          -- We need to know one removed digit even if we are not going to loop
          -- below. We could use q = X - 1 above, except that would require 33
          -- bits for the result, and we've found that 32-bit arithmetic is
          -- faster even on 64-bit machines.
          then let l = float_pow5_inv_bitcount + pow5bits (q - 1) - 1
                in frem10 (mulPow5InvDivPow2 v (q - 1) (-e2 + q - 1 + l))
          else 0
      !(vvTrailing, vuTrailing, vw') =
        case () of
          _ | q < 9 && frem5 v == 0
                -> (multipleOfPowerOf5 v q, False, w')
            | q < 9 && acceptBounds v
                -> (False, multipleOfPowerOf5 u q, w')
            | q < 9
                -> (False, False, w' - boolToWord32 (multipleOfPowerOf5 w q))
            | otherwise
                -> (False, False, w')
   in (BoundsState u' v' vw' lastRemoved vuTrailing vvTrailing, intToInt32 q)

-- | Handle case e2 < 0
f2dLT :: Int32 -> Word32 -> Word32 -> Word32 -> (BoundsState Word32, Int32)
f2dLT e2' u v w =
  let e2 = int32ToInt e2'
      q = log10pow5 (-e2)
      e10 = q + e2
      i = (-e2) - q
      -- k = log_2(5^-e2-q) - B1
      k = pow5bits i - float_pow5_bitcount
      j = q - k
      -- (u, v, w) * 5^-e2-q / 2^k / 2^q-k
      u' = mulPow5DivPow2 u i j
      v' = mulPow5DivPow2 v i j
      w' = mulPow5DivPow2 w i j
      !lastRemoved =
        if q /= 0 && fquot10 (w' - 1) <= fquot10 u'
          then let j' = q - 1 - (pow5bits (i + 1) - float_pow5_bitcount)
                in frem10 (mulPow5DivPow2 v (i + 1) j')
          else 0
      !(vvTrailing , vuTrailing, vw') =
        case () of
          _ | q <= 1 && acceptBounds v
                -> (True, v - u == 2, w') -- mmShift == 1
            | q <= 1
                -> (True, False, w' - 1)
            | q < 31
                -> (multipleOfPowerOf2 v (q - 1), False, w')
            | otherwise
                -> (False, False, w')
   in (BoundsState u' v' vw' lastRemoved vuTrailing vvTrailing, intToInt32 e10)

-- | Returns the decimal representation of the given mantissa and exponent of a
-- 32-bit Float using the ryu algorithm.
f2d :: Word32 -> Word32 -> FloatingDecimal
f2d m e =
  let !mf = if e == 0
              then m
              else (1 `unsafeShiftL` float_mantissa_bits) .|. m
      !ef = intToInt32 $ if e == 0
              then 1 - (float_bias + float_mantissa_bits)
              else word32ToInt e - (float_bias + float_mantissa_bits)
      !e2 = ef - 2
      -- Step 2. 3-tuple (u, v, w) * 2**e2
      !u = 4 * mf - 1 - boolToWord32 (m /= 0 || e <= 1)
      !v = 4 * mf
      !w = 4 * mf + 2
      -- Step 3. convert to decimal power base
      !(state, e10) =
        if e2 >= 0
           then f2dGT e2 u v w
           else f2dLT e2 u v w
      -- Step 4: Find the shortest decimal representation in the interval of
      -- valid representations.
      !(output, removed) =
        let rounded = closestCorrectlyRounded (acceptBounds v)
         in first rounded $ if vvIsTrailingZeros state || vuIsTrailingZeros state
           then trimTrailing state
           else trimNoTrailing state
      !e' = e10 + removed
   in FloatingDecimal output e'

-- | Split a Float into (sign, mantissa, exponent)
breakdown :: Float -> (Bool, Word32, Word32)
breakdown f =
  let bits = castFloatToWord32 f
      sign = ((bits `unsafeShiftR` (float_mantissa_bits + float_exponent_bits)) .&. 1) /= 0
      mantissa = bits .&. mask float_mantissa_bits
      expo = (bits `unsafeShiftR` float_mantissa_bits) .&. mask float_exponent_bits
   in (sign, mantissa, expo)

-- | Dispatches to `f2d` and applies the given formatters
{-# INLINE f2s' #-}
f2s' :: (Bool -> Word32 -> Int32 -> a) -> (NonNumbersAndZero -> a) -> Float -> a
f2s' formatter specialFormatter f =
  let (sign, mantissa, expo) = breakdown f
   in if (expo == mask float_exponent_bits) || (expo == 0 && mantissa == 0)
         then specialFormatter NonNumbersAndZero
                  { negative=sign
                  , exponent_all_one=expo > 0
                  , mantissa_non_zero=mantissa > 0 }
         else let FloatingDecimal m e = f2d mantissa expo
               in formatter sign m e

-- | Render a Float in scientific notation
f2s :: Float -> Builder
f2s f = primBounded (f2s' toCharsScientific toCharsNonNumbersAndZero f) ()

-- | Returns the decimal representation of a Float. NaN and Infinity will
-- return `FloatingDecimal 0 0`
f2Intermediate :: Float -> FloatingDecimal
f2Intermediate = f2s' (const FloatingDecimal) (const $ FloatingDecimal 0 0)