1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE CPP #-}
module Unicode.CharSpec
( spec
) where
import qualified Data.Char as Char
import Data.Ix (Ix(..))
import Data.Maybe (isJust)
import qualified Unicode.Char as UChar
import qualified Unicode.Char.General.Blocks as UBlocks
-- [TODO] Remove the following qualified imports once isLetter and isSpace
-- are removed from Unicode.Char.General
import qualified Unicode.Char.General.Compat as UCharCompat
-- [TODO] Remove the following qualified imports once isUpper and isLower
-- are removed from Unicode.Char.Case
import qualified Unicode.Char.Case.Compat as UCharCompat
import qualified Unicode.Char.Numeric as UNumeric
import qualified Unicode.Char.Numeric.Compat as UNumericCompat
import Data.Foldable (traverse_)
import Test.Hspec
{- [NOTE]
These tests may fail if the compiler’s Unicode version
does not match the version of this package.
+-------------+----------------+-----------------+
| GHC version | @base@ version | Unicode version |
+=============+================+=================+
| 8.8 | 4.13 | 12.0 |
| 8.10.[1-4] | 4.14.{0,1} | 12.0 |
| 8.10.5+ | 4.14.2+ | 13.0 |
| 9.0.[1-2] | 4.15.0 | 12.1 |
| 9.2.[1-4] | 4.16.0 | 14.0 |
| 9.4.[1-2] | 4.17.0 | 14.0 |
| 9.6.1 | 4.18.0 | 15.0 |
+-------------+----------------+-----------------+
-}
spec :: Spec
spec = do
#ifdef COMPATIBLE_GHC_UNICODE
let describe' = describe
let it' = it
#else
let describe' t = before_ (pendingWith "Incompatible GHC Unicode version")
. describe t
let it' t = before_ (pendingWith "Incompatible GHC Unicode version")
. it t
#endif
describe "Unicode blocks" do
it "Characters not in any block are unassigned"
let { check c = case UBlocks.block c of
Just _ -> pure ()
Nothing -> UChar.generalCategory c `shouldBe` UChar.NotAssigned
} in traverse_ check [minBound..maxBound]
it "Characters are in the definition of their corresponding block"
let {
check c = case UBlocks.block c of
Nothing -> pure ()
Just b ->
let r = UBlocks.blockRange (UBlocks.blockDefinition b)
in if inRange r (UChar.ord c)
then pure ()
else expectationFailure $ mconcat
[ "Character “", show c
, "” is not in the block “", show b, "”." ]
} in traverse_ check [minBound..maxBound]
it "Characters in a block definition have the corresponding block"
let {
check b = let r = UBlocks.blockRange (UBlocks.blockDefinition b)
in traverse_ (checkChar b) (UChar.chr <$> range r);
checkChar b c = let b' = UBlocks.block c in if b' == Just b
then pure ()
else expectationFailure $ mconcat
[ "Block is different for “", show c, "”. Expected: “Just "
, show b, "” but got: “", show b', "”." ]
} in traverse_ check [minBound..maxBound]
describe' "Unicode general categories" do
it "generalCategory" do
-- [NOTE] We cannot compare the categories directly, so use 'show'.
(show . UChar.generalCategory) `shouldBeEqualTo` (show . Char.generalCategory)
describe' "Character classification" do
it "isAlpha" do
UChar.isAlpha `shouldBeEqualTo` Char.isAlpha
it "isAlphaNum" do
UChar.isAlphaNum `shouldBeEqualTo` Char.isAlphaNum
it "isControl" do
UChar.isControl `shouldBeEqualTo` Char.isControl
it "isLetter" do
UCharCompat.isLetter `shouldBeEqualTo` Char.isLetter
it "isMark" do
UChar.isMark `shouldBeEqualTo` Char.isMark
it "isPrint" do
UChar.isPrint `shouldBeEqualTo` Char.isPrint
it "isPunctuation" do
UChar.isPunctuation `shouldBeEqualTo` Char.isPunctuation
it "isSeparator" do
UChar.isSeparator `shouldBeEqualTo` Char.isSeparator
it "isSpace" do
UCharCompat.isSpace `shouldBeEqualTo` Char.isSpace
it "isSymbol" do
UChar.isSymbol `shouldBeEqualTo` Char.isSymbol
describe "Case" do
it' "isLower" do
UCharCompat.isLower `shouldBeEqualTo` Char.isLower
it' "isUpper" do
UCharCompat.isUpper `shouldBeEqualTo` Char.isUpper
it' "toLower" do
UChar.toLower `shouldBeEqualTo` Char.toLower
let caseCheck f (c, cs) = c `shouldSatisfy` (== cs) . f
describe "toLowerString" do
it "Examples" do
let examples = [ ('\0', "\0")
, ('a', "a")
, ('A', "a")
, ('1', "1")
, ('\x130', "i\x307") ]
traverse_ (caseCheck UChar.toLowerString) examples
it' "Common mapping should match simple one" do
let check c = case UChar.toLowerString c of
[c'] -> c `shouldSatisfy` ((== c') . UChar.toLower)
_ -> pure ()
traverse_ check [minBound..maxBound]
it "Idempotency of 'foldMap toLowerString'" do
let check c = c `shouldSatisfy` \c' ->
let cf = UChar.toLowerString c'
in cf == foldMap UChar.toLowerString cf
traverse_ check [minBound..maxBound]
it' "toUpper" do
UChar.toUpper `shouldBeEqualTo` Char.toUpper
describe "toUpperString" do
it "Examples" do
let examples = [ ('\0', "\0")
, ('a', "A")
, ('A', "A")
, ('1', "1")
, ('\xdf', "SS")
, ('\x1F52', "\x03A5\x0313\x0300") ]
traverse_ (caseCheck UChar.toUpperString) examples
it' "Common mapping should match simple one" do
let check c = case UChar.toUpperString c of
[c'] -> c `shouldSatisfy` ((== c') . UChar.toUpper)
_ -> pure ()
traverse_ check [minBound..maxBound]
it "Idempotency of 'foldMap toUpperString'" do
let check c = c `shouldSatisfy` \c' ->
let cf = UChar.toUpperString c'
in cf == foldMap UChar.toUpperString cf
traverse_ check [minBound..maxBound]
it' "toTitle" do
UChar.toTitle `shouldBeEqualTo` Char.toTitle
describe "toTitleString" do
it "Examples" do
let examples = [ ('\0', "\0")
, ('a', "A")
, ('A', "A")
, ('1', "1")
, ('\xdf', "Ss")
, ('\xfb02', "Fl")
, ('\x1F52', "\x03A5\x0313\x0300") ]
traverse_ (caseCheck UChar.toTitleString) examples
it' "Common mapping should match simple one" do
let check c = case UChar.toTitleString c of
[c'] -> c `shouldSatisfy` ((== c') . UChar.toTitle)
_ -> pure ()
traverse_ check [minBound..maxBound]
describe "toCaseFoldString" do
it "Examples" do
let examples = [ ('\0', "\0")
, ('a', "a")
, ('A', "a")
, ('1', "1")
, ('\xb5', "\x3bc")
, ('\xfb13', "\x574\x576") ]
traverse_ (caseCheck UChar.toCaseFoldString) examples
it "Idempotency of 'foldMap toCaseFoldString'" do
let check c = c `shouldSatisfy` \c' ->
let cf = UChar.toCaseFoldString c'
in cf == foldMap UChar.toCaseFoldString cf
traverse_ check [minBound..maxBound]
describe "Numeric" do
it' "isNumber" do
UNumericCompat.isNumber `shouldBeEqualTo` Char.isNumber
it "isNumber implies a numeric value" do
-- [NOTE] the following does not hold with the current predicate `isNumber`.
-- As of Unicode 15.0.0, there are 81 such characters (all CJK).
-- let check c = (UNumeric.isNumber c `xor` isNothing (UNumeric.numericValue c))
let check c = not (UNumericCompat.isNumber c) || isJust (UNumeric.numericValue c)
traverse_ (`shouldSatisfy` check) [minBound..maxBound]
where
shouldBeEqualTo
:: forall a b. (Bounded a, Enum a, Show a, Eq b, Show b)
=> (a -> b)
-> (a -> b)
-> IO ()
shouldBeEqualTo f g =
let same x = f x == g x
in traverse_ (`shouldSatisfy` same) [minBound..maxBound]
|