File: Char.hs

package info (click to toggle)
haskell-unicode-data 0.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,004 kB
  • sloc: haskell: 26,075; makefile: 3
file content (78 lines) | stat: -rw-r--r-- 2,679 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
-- |
-- Module      : Unicode.Char
-- Copyright   : (c) 2020 Composewell Technologies and Contributors
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
--
-- This module provides APIs to access the Unicode character database (UCD)
-- corresponding to [Unicode Standard version
-- 15.1.0](https://www.unicode.org/versions/Unicode15.1.0/).
--
-- This module re-exports several sub-modules under it.  The sub-module
-- structure under `Unicode.Char` is largely based on the
-- ["Property Index by Scope of Use" in UnicodeĀ® Standard Annex #44](https://www.unicode.org/reports/tr44/#Property_Index_Table).
--
-- The @Unicode.Char.*@ modules in turn depend on @Unicode.Internal.Char.*@
-- modules which are programmatically generated from the Unicode standard's
-- Unicode character database files. The module structure under
-- @Unicode.Internal.Char@ is largely based on the UCD text file names from
-- which the properties are generated.
--
-- For the original UCD files used in this code please refer to the @UCD@
-- section on the Unicode standard page.  See
-- https://www.unicode.org/reports/tr44/ to understand the contents and the
-- format of the unicode database files.
--

module Unicode.Char
    ( unicodeVersion
    , module Unicode.Char.General
    , module Unicode.Char.General.Compat
    , module Unicode.Char.Case
    , module Unicode.Char.Case.Compat
    , module Unicode.Char.Numeric
    , module Unicode.Char.Normalization
    , module Unicode.Char.Identifiers

      -- * Utils
    , showCodePoint

      -- * Re-export from @base@
    , ord
    , chr
    )
where

import Data.Char (chr, ord)
import Unicode.Char.Case hiding (Unfold(..), Step(..))
import Unicode.Char.Case.Compat
import Unicode.Char.General hiding (isAlphaNum)
import Unicode.Char.General.Compat
import Unicode.Char.Identifiers
import Unicode.Char.Numeric
import Unicode.Char.Normalization
import Unicode.Internal.Char.Version (unicodeVersion)

-- [NOTE] Code inspired from 'showIntAtBase'
-- | Show the code point of a character using the Unicode Standard convention:
-- hexadecimal codepoint padded with zeros if inferior to 4 characters.
--
-- >>> showCodePoint '\xf' ""
-- "000F"
-- >>> showCodePoint '\x1ffff' ""
-- "1FFFF"
--
-- @since 0.6.0
showCodePoint :: Char -> ShowS
showCodePoint c = pad . showIt (quotRem cp 16)
    where
    cp = ord c
    pad | cp <= 0x00f = \s -> '0' : '0' : '0' : s
        | cp <= 0x0ff = \s -> '0' : '0' : s
        | cp <= 0xfff = ('0' :)
        | otherwise   = id
    showIt (n, d) r = case intToDigiT d of
        !c' -> case n of
            0 -> c' : r
            _ -> showIt (quotRem n 16) (c' : r)