1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
-- |
-- Module : Unicode.Char.Numeric
-- Copyright : (c) 2020 Composewell Technologies and Contributors
-- License : Apache-2.0
-- Maintainer : streamly@composewell.com
-- Stability : experimental
--
-- Numeric character property related functions.
--
-- @since 0.3.0
module Unicode.Char.Numeric
( -- * Predicates
isNumeric
-- * Numeric values
, numericValue
, integerValue
-- * Single digit characters
, intToDigiT
-- * Re-export from @base@
, isDigit
, isOctDigit
, isHexDigit
, digitToInt
, intToDigit
) where
import Data.Char (digitToInt, intToDigit, isDigit, isHexDigit, isOctDigit)
import Data.Int (Int64)
import Data.Maybe (isJust)
import Data.Ratio (denominator, numerator)
import GHC.Exts (Char (..), Int (..), chr#, isTrue#, (+#), (<=#), (>=#))
import qualified Unicode.Internal.Char.DerivedNumericValues as V
-- $setup
-- >>> import Data.Int (Int32, Int64)
-- | Selects Unicode character with a numeric value.
--
-- __Note:__ a character may have a numeric value but return 'False' with
-- the predicate 'Unicode.Char.Numeric.Compat.isNumber', because
-- 'Unicode.Char.Numeric.Compat.isNumber' only tests
-- 'Unicode.Char.General.GeneralCategory': some CJK characters are
-- 'Unicode.Char.General.OtherLetter' and do have a numeric value.
--
-- prop> isNumeric c == isJust (numericValue c)
--
-- @since 0.3.1
{-# INLINE isNumeric #-}
isNumeric :: Char -> Bool
isNumeric = isJust . V.numericValue
-- | Numeric value of a character, if relevant.
--
-- __Note:__ a character may have a numeric value but return 'False' with
-- the predicate 'Unicode.Char.Numeric.Compat.isNumber', because
-- 'Unicode.Char.Numeric.Compat.isNumber' only tests
-- 'Unicode.Char.General.GeneralCategory': some CJK characters are
-- 'Unicode.Char.General.OtherLetter' and do have a numeric value.
--
-- @since 0.3.1
{-# INLINE numericValue #-}
numericValue :: Char -> Maybe Rational
numericValue = V.numericValue
-- | Integer value of a character, if relevant.
--
-- This is a special case of 'numericValue'.
--
-- __Warning:__ There is a risk of /integer overflow/ depending of the chosen
-- concrete return type. As of Unicode 15.1 the results range from 0 to 1e16.
--
-- >>> minimum [v | v@Just{} <- integerValue <$> [minBound..]] :: Maybe Integer
-- Just 0
-- >>> maximum (integerValue <$> [minBound..]) :: Maybe Integer
-- Just 10000000000000000
-- >>> integerValue '\x4EAC' :: Maybe Int64 -- OK
-- Just 10000000000000000
-- >>> integerValue '\x4EAC' :: Maybe Int32 -- Will overflow!
-- Just 1874919424
--
-- Therefore it is advised to use: @'integerValue' \@'Int64'@.
--
-- __Note:__ A character may have a numeric value but return 'False' with
-- the predicate 'Unicode.Char.Numeric.Compat.isNumber', because
-- 'Unicode.Char.Numeric.Compat.isNumber' only tests
-- 'Unicode.Char.General.GeneralCategory': some CJK characters are
-- 'Unicode.Char.General.OtherLetter' and do have a numeric value.
--
-- @since 0.3.1
{-# INLINE integerValue #-}
{-# SPECIALIZE integerValue :: Char -> Maybe Integer #-}
{-# SPECIALIZE integerValue :: Char -> Maybe Int64 #-}
{-# SPECIALIZE integerValue :: Char -> Maybe Int #-}
integerValue :: (Integral a) => Char -> Maybe a
integerValue c = do
r <- V.numericValue c
if denominator r == 1
then Just (fromInteger (numerator r))
else Nothing
-- | Same a 'intToDigit', but with upper case.
--
-- >>> intToDigiT <$> [0..15]
-- "0123456789ABCDEF"
--
-- @since 0.6.0
intToDigiT :: Int -> Char
intToDigiT (I# i)
| isTrue# (i >=# 0#) && isTrue# (i <=# 9#) = C# (chr# (0x30# +# i))
| isTrue# (i >=# 10#) && isTrue# (i <=# 15#) = C# (chr# (0x37# +# i))
| otherwise = errorWithoutStackTrace
("Unicode.Char.Numeric.intToDigiT: not a digit " ++ show (I# i))
|