File: PropList.hs

package info (click to toggle)
haskell-unicode-data 0.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,004 kB
  • sloc: haskell: 26,075; makefile: 3
file content (112 lines) | stat: -rw-r--r-- 5,111 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
-- autogenerated from https://www.unicode.org/Public/15.1.0/ucd/PropList.txt
-- |
-- Module      : Unicode.Internal.Char.PropList
-- Copyright   : (c) 2020 Composewell Technologies and Contributors
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental

{-# OPTIONS_HADDOCK hide #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use camelCase" #-}

module Unicode.Internal.Char.PropList
    ( isPattern_Syntax
    , isPattern_White_Space
    , isWhite_Space
    ) where

import Data.Bits (Bits(..))
import Data.Char (ord)
import Data.Int (Int8)
import Data.Word (Word8)
import GHC.Exts (Ptr(..))
import Unicode.Internal.Bits (lookupBit, lookupWord8AsInt)

{-# INLINE isPattern_Syntax #-}
isPattern_Syntax :: Char -> Bool
isPattern_Syntax c = c >= '\x0021' && c <= '\xFE46' && lookupIsPattern_SyntaxBitMap (ord c)

{-# INLINE lookupIsPattern_SyntaxBitMap #-}
lookupIsPattern_SyntaxBitMap :: Int -> Bool
lookupIsPattern_SyntaxBitMap n =
    lookupBit data# (
        lookupWord8AsInt offsets# (
            n `shiftR` 8
        ) + ((n `shiftR` 3) .&. mask)
    ) (n .&. 7)
    where
    mask = (1 `shiftL` 5) - 1
    !(Ptr data#) = isPattern_SyntaxDataBitMap
    !(Ptr offsets#) = isPattern_SyntaxOffsetsBitMap

isPattern_SyntaxDataBitMap :: Ptr Int8
isPattern_SyntaxDataBitMap = Ptr
    "\0\0\255\255\255\0\255\127\254\255\239\127\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\254\255\0\252\1\0\
    \\0\120\1\0\0\120\0\0\0\0\254\90\67\136\0\0\128\0\0\0\128\0\14\255\255\255\1\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
    \\0\0\0\0\192\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\96\0\0\255\255\255\0\255\127\254\255\239\127\0\0\0\0\0\0\0\0\
    \\0\0\0\0\0\0\0\0\0\0\0\0\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\0\0\0\0\0\0\
    \\0\0\0\0\0\0\0\0\0\0\0\0\0\0\255\255\255\255\255\255\255\255\255\255\255\255\255\255\63\0\0\0\240\255\255\255\255\255\255\255\255\255\255\255\255\255"#

isPattern_SyntaxOffsetsBitMap :: Ptr Word8
isPattern_SyntaxOffsetsBitMap = Ptr
    "\40\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\0\144\162\162\182\162\162\214\162\162\162\162\12\12\178\12\72\12\
    \\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\
    \\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\
    \\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\
    \\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\
    \\12\12\12\97\121"#

{-# INLINE isPattern_White_Space #-}
isPattern_White_Space :: Char -> Bool
isPattern_White_Space c = c >= '\x0009' && c <= '\x2029' && lookupIsPattern_White_SpaceBitMap (ord c)

{-# INLINE lookupIsPattern_White_SpaceBitMap #-}
lookupIsPattern_White_SpaceBitMap :: Int -> Bool
lookupIsPattern_White_SpaceBitMap n =
    lookupBit data# (
        lookupWord8AsInt offsets# (
            n `shiftR` 8
        ) + ((n `shiftR` 3) .&. mask)
    ) (n .&. 7)
    where
    mask = (1 `shiftL` 5) - 1
    !(Ptr data#) = isPattern_White_SpaceDataBitMap
    !(Ptr offsets#) = isPattern_White_SpaceOffsetsBitMap

isPattern_White_SpaceDataBitMap :: Ptr Int8
isPattern_White_SpaceDataBitMap = Ptr
    "\0\62\0\0\1\0\0\0\0\0\0\0\0\0\0\0\32\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
    \\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\192\0\0\0\3"#

isPattern_White_SpaceOffsetsBitMap :: Ptr Word8
isPattern_White_SpaceOffsetsBitMap = Ptr
    "\0\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\80"#

{-# INLINE isWhite_Space #-}
isWhite_Space :: Char -> Bool
isWhite_Space c = c >= '\x0009' && c <= '\x3000' && lookupIsWhite_SpaceBitMap (ord c)

{-# INLINE lookupIsWhite_SpaceBitMap #-}
lookupIsWhite_SpaceBitMap :: Int -> Bool
lookupIsWhite_SpaceBitMap n =
    lookupBit data# (
        lookupWord8AsInt offsets# (
            n `shiftR` 8
        ) + ((n `shiftR` 3) .&. mask)
    ) (n .&. 7)
    where
    mask = (1 `shiftL` 5) - 1
    !(Ptr data#) = isWhite_SpaceDataBitMap
    !(Ptr offsets#) = isWhite_SpaceOffsetsBitMap

isWhite_SpaceDataBitMap :: Ptr Int8
isWhite_SpaceDataBitMap = Ptr
    "\255\7\0\0\0\131\0\0\0\0\0\128\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0\0\0\0\0\
    \\0\0\0\0\0\0\0\0\0\0\0\62\0\0\1\0\0\0\0\0\0\0\0\0\0\0\32\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
    \\0\0\0\0\0\0\0\0\0\0\0\0\0"#

isWhite_SpaceOffsetsBitMap :: Ptr Word8
isWhite_SpaceOffsetsBitMap = Ptr
    "\60\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\28\12\12\12\12\12\12\12\12\12\0\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\44"#