File: Bits.hs

package info (click to toggle)
haskell-unicode-data 0.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,004 kB
  • sloc: haskell: 26,075; makefile: 3
file content (164 lines) | stat: -rw-r--r-- 4,988 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
{-# LANGUAGE CPP #-}

-- |
-- Module      : Unicode.Internal.Bits
-- Copyright   : (c) 2020 Andrew Lelechenko
--               (c) 2020 Composewell Technologies
-- License     : BSD-3-Clause
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC
--
-- Fast, static bitmap lookup utilities

module Unicode.Internal.Bits
    ( -- * Bitmap lookup
      lookupBit,
      lookupWord8AsInt,
      lookupWord8AsInt#,
      lookupWord16AsInt,
      lookupWord16AsInt#,
      lookupWord32#,
      -- * CString
      unpackCString#
    ) where

#include "MachDeps.h"

import GHC.Exts
       (Addr#, Int(..), Int#, Word(..), Word#, indexWord8OffAddr#,
        indexWord16OffAddr#, indexWord32OffAddr#,
        and#, word2Int#, uncheckedShiftL#)
#if MIN_VERSION_base(4,16,0)
import GHC.Exts (word8ToWord#, word16ToWord#, word32ToWord#)
#endif
#ifdef WORDS_BIGENDIAN
import GHC.Exts
       (narrow16Word#, narrow32Word#,
        byteSwap16#, byteSwap32#)
#endif

#if MIN_VERSION_base(4,15,0)
import GHC.Exts (unpackCString#)
#else
import GHC.CString (unpackCString#)
#endif

-- TODO: remove?
-- {- | @lookupBit addr index@ looks up the bit stored at bit index @index@ using
-- a bitmap starting at the address @addr@. Looks up the word containing the bit
-- and then the bit in that word. The caller must make sure that the word at the
-- byte address @(addr + index / wfbs)@, where @wfbs@ is the finite bit size of a
-- word, is legally accessible memory.
-- -}
-- lookupBit :: Addr# -> Int -> Bool
-- lookupBit addr# (I# index#) = W# (word## `and#` bitMask##) /= 0
--   where
--     !fbs@(I# fbs#) = finiteBitSize (0 :: Word) - 1
--     !(I# log2Fbs) = case fbs of
--       31 -> 5
--       63 -> 6
--       _  -> popCount fbs -- this is a really weird architecture

--     wordIndex# = index# `uncheckedIShiftRL#` log2Fbs
-- #ifdef WORDS_BIGENDIAN
--     word## = byteSwap# (indexWordOffAddr# addr# wordIndex#)
-- #else
--     word## = indexWordOffAddr# addr# wordIndex#
-- #endif
--     -- x % 2^n = x & (2^n - 1)
--     bitIndex# = index# `andI#` fbs#
--     bitMask## = 1## `uncheckedShiftL#` bitIndex#

{- | @lookupBit addr byteIndex bitIndex@ looks up the bit stored in the byte
at index @byteIndex@ at the bit index @bitIndex@ using a bitmap starting at the
address @addr@. The caller must make sure that the byte at address
@(addr + byteIndex)@ is legally accessible memory.
-}
lookupBit :: Addr# -> Int -> Int -> Bool
lookupBit addr# (I# byteIndex#) (I# bitIndex#) =
    W# (word## `and#` bitMask##) /= 0
  where
#if MIN_VERSION_base(4,16,0)
    word## = word8ToWord# (indexWord8OffAddr# addr# byteIndex#)
#else
    word## = indexWord8OffAddr# addr# byteIndex#
#endif
    bitMask## = 1## `uncheckedShiftL#` bitIndex#

{-| @lookupWord8AsInt addr index@ looks up for the @index@-th @8@-bits word in
the bitmap starting at @addr@, then convert it to an 'Int'.

The caller must make sure that:

* @ceiling (addr + (n * 8))@ is legally accessible 'GHC.Exts.Word8#'.

@since 0.3.0
-}
lookupWord8AsInt
  :: Addr# -- ^ Bitmap address
  -> Int   -- ^ Word index
  -> Int   -- ^ Resulting word as 'Int'
lookupWord8AsInt addr# (I# index#) = I# (lookupWord8AsInt# addr# index#)

lookupWord8AsInt#
  :: Addr# -- ^ Bitmap address
  -> Int#  -- ^ Word index
  -> Int#  -- ^ Resulting word as 'Int'
lookupWord8AsInt# addr# index# = word2Int# word##
  where
#if MIN_VERSION_base(4,16,0)
    word## = word8ToWord# (indexWord8OffAddr# addr# index#)
#else
    word## = indexWord8OffAddr# addr# index#
#endif

lookupWord16AsInt
  :: Addr# -- ^ Bitmap address
  -> Int   -- ^ Word index
  -> Int   -- ^ Resulting word as `Int`
lookupWord16AsInt addr# (I# k#) = I# (lookupWord16AsInt# addr# k#)

lookupWord16AsInt#
  :: Addr# -- ^ Bitmap address
  -> Int#  -- ^ Word index
  -> Int#  -- ^ Resulting word as `Int`
lookupWord16AsInt# addr# k# = word2Int# word##
    where
#ifdef WORDS_BIGENDIAN
#if MIN_VERSION_base(4,16,0)
    word## = narrow16Word# (byteSwap16# (word16ToWord# (indexWord16OffAddr# addr# k#)))
#else
    word## = narrow16Word# (byteSwap16# (indexWord16OffAddr# addr# k#))
#endif
#elif MIN_VERSION_base(4,16,0)
    word## = word16ToWord# (indexWord16OffAddr# addr# k#)
#else
    word## = indexWord16OffAddr# addr# k#
#endif

{-| @lookupWord32# addr index@ looks up for the @index@-th 32-bits word in
the bitmap starting at @addr@, then convert it to a 'Word#'.

The caller must make sure that:

* @ceiling (addr + (n * 32))@ is legally accessible 'GHC.Exts.Word32#'.

@since 0.4.1
-}
lookupWord32#
  :: Addr# -- ^ Bitmap address
  -> Int#  -- ^ Word index
  -> Word# -- ^ Resulting word
lookupWord32#
#ifdef WORDS_BIGENDIAN
#if MIN_VERSION_base(4,16,0)
    addr# k# = narrow32Word# (byteSwap32# (word32ToWord# (indexWord32OffAddr# addr# k#)))
#else
    addr# k# = narrow32Word# (byteSwap32# (indexWord32OffAddr# addr# k#))
#endif
#elif MIN_VERSION_base(4,16,0)
    addr# k# = word32ToWord# (indexWord32OffAddr# addr# k#)
#else
    = indexWord32OffAddr#
#endif