1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
-- |
-- Module : Unicode.Char
-- Copyright : (c) 2020 Composewell Technologies and Contributors
-- License : Apache-2.0
-- Maintainer : streamly@composewell.com
-- Stability : experimental
--
-- This module provides APIs to access the Unicode character database (UCD)
-- corresponding to [Unicode Standard version
-- 15.1.0](https://www.unicode.org/versions/Unicode15.1.0/).
--
-- This module re-exports several sub-modules under it. The sub-module
-- structure under `Unicode.Char` is largely based on the
-- ["Property Index by Scope of Use" in UnicodeĀ® Standard Annex #44](https://www.unicode.org/reports/tr44/#Property_Index_Table).
--
-- The @Unicode.Char.*@ modules in turn depend on @Unicode.Internal.Char.*@
-- modules which are programmatically generated from the Unicode standard's
-- Unicode character database files. The module structure under
-- @Unicode.Internal.Char@ is largely based on the UCD text file names from
-- which the properties are generated.
--
-- For the original UCD files used in this code please refer to the @UCD@
-- section on the Unicode standard page. See
-- https://www.unicode.org/reports/tr44/ to understand the contents and the
-- format of the unicode database files.
--
module Unicode.Char
( unicodeVersion
, module Unicode.Char.General
, module Unicode.Char.General.Compat
, module Unicode.Char.Case
, module Unicode.Char.Case.Compat
, module Unicode.Char.Numeric
, module Unicode.Char.Normalization
, module Unicode.Char.Identifiers
-- * Utils
, showCodePoint
-- * Re-export from @base@
, ord
, chr
)
where
import Data.Char (chr, ord)
import Unicode.Char.Case hiding (Unfold(..), Step(..))
import Unicode.Char.Case.Compat
import Unicode.Char.General hiding (isAlphaNum)
import Unicode.Char.General.Compat
import Unicode.Char.Identifiers
import Unicode.Char.Numeric
import Unicode.Char.Normalization
import Unicode.Internal.Char.Version (unicodeVersion)
-- [NOTE] Code inspired from 'showIntAtBase'
-- | Show the code point of a character using the Unicode Standard convention:
-- hexadecimal codepoint padded with zeros if inferior to 4 characters.
--
-- >>> showCodePoint '\xf' ""
-- "000F"
-- >>> showCodePoint '\x1ffff' ""
-- "1FFFF"
--
-- @since 0.6.0
showCodePoint :: Char -> ShowS
showCodePoint c = pad . showIt (quotRem cp 16)
where
cp = ord c
pad | cp <= 0x00f = \s -> '0' : '0' : '0' : s
| cp <= 0x0ff = \s -> '0' : '0' : s
| cp <= 0xfff = ('0' :)
| otherwise = id
showIt (n, d) r = case intToDigiT d of
!c' -> case n of
0 -> c' : r
_ -> showIt (quotRem n 16) (c' : r)
|