1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
{-# LANGUAGE BangPatterns, MagicHash #-}
-- |
-- Module : Data.Text.Internal.Fusion
-- Copyright : (c) Tom Harper 2008-2009,
-- (c) Bryan O'Sullivan 2009-2010,
-- (c) Duncan Coutts 2009
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : GHC
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Text manipulation functions represented as fusible operations over
-- streams.
module Data.Text.Internal.Fusion
(
-- * Types
Stream(..)
, Step(..)
-- * Creation and elimination
, stream
, unstream
, reverseStream
, length
-- * Transformations
, reverse
-- * Construction
-- ** Scans
, reverseScanr
-- ** Accumulating maps
, mapAccumL
-- ** Generation and unfolding
, unfoldrN
-- * Indexing
, index
, findIndex
, countChar
) where
import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
Num(..), Ord(..), ($), (&&),
fromIntegral, otherwise)
import Data.Bits ((.&.))
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Private (runText)
import Data.Text.Internal.Unsafe.Char (ord, unsafeChr, unsafeWrite)
import Data.Text.Internal.Unsafe.Shift (shiftL, shiftR)
import qualified Data.Text.Array as A
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Internal.Fusion.Types
import Data.Text.Internal.Fusion.Size
import qualified Data.Text.Internal as I
import qualified Data.Text.Internal.Encoding.Utf16 as U16
default(Int)
-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
stream :: Text -> Stream Char
stream (Text arr off len) = Stream next off (betweenSize (len `shiftR` 1) len)
where
!end = off+len
next !i
| i >= end = Done
| n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
| otherwise = Yield (unsafeChr n) (i + 1)
where
n = A.unsafeIndex arr i
n2 = A.unsafeIndex arr (i + 1)
{-# INLINE [0] stream #-}
-- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate
-- backwards.
reverseStream :: Text -> Stream Char
reverseStream (Text arr off len) = Stream next (off+len-1) (betweenSize (len `shiftR` 1) len)
where
{-# INLINE next #-}
next !i
| i < off = Done
| n >= 0xDC00 && n <= 0xDFFF = Yield (U16.chr2 n2 n) (i - 2)
| otherwise = Yield (unsafeChr n) (i - 1)
where
n = A.unsafeIndex arr i
n2 = A.unsafeIndex arr (i - 1)
{-# INLINE [0] reverseStream #-}
-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
unstream :: Stream Char -> Text
unstream (Stream next0 s0 len) = runText $ \done -> do
let mlen = upperBound 4 len
arr0 <- A.new mlen
let outer arr top = loop
where
loop !s !i =
case next0 s of
Done -> done arr i
Skip s' -> loop s' i
Yield x s'
| j >= top -> {-# SCC "unstream/resize" #-} do
let top' = (top + 1) `shiftL` 1
arr' <- A.new top'
A.copyM arr' 0 arr 0 top
outer arr' top' s i
| otherwise -> do d <- unsafeWrite arr i x
loop s' (i+d)
where j | ord x < 0x10000 = i
| otherwise = i + 1
outer arr0 mlen s0 0
{-# INLINE [0] unstream #-}
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
-- ----------------------------------------------------------------------------
-- * Basic stream functions
length :: Stream Char -> Int
length = S.lengthI
{-# INLINE[0] length #-}
-- | /O(n)/ Reverse the characters of a string.
reverse :: Stream Char -> Text
reverse (Stream next s len0)
| isEmpty len0 = I.empty
| otherwise = I.text arr off' len'
where
len0' = upperBound 4 (larger len0 4)
(arr, (off', len')) = A.run2 (A.new len0' >>= loop s (len0'-1) len0')
loop !s0 !i !len marr =
case next s0 of
Done -> return (marr, (j, len-j))
where j = i + 1
Skip s1 -> loop s1 i len marr
Yield x s1 | i < least -> {-# SCC "reverse/resize" #-} do
let newLen = len `shiftL` 1
marr' <- A.new newLen
A.copyM marr' (newLen-len) marr 0 len
write s1 (len+i) newLen marr'
| otherwise -> write s1 i len marr
where n = ord x
least | n < 0x10000 = 0
| otherwise = 1
m = n - 0x10000
lo = fromIntegral $ (m `shiftR` 10) + 0xD800
hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
write t j l mar
| n < 0x10000 = do
A.unsafeWrite mar j (fromIntegral n)
loop t (j-1) l mar
| otherwise = do
A.unsafeWrite mar (j-1) lo
A.unsafeWrite mar j hi
loop t (j-2) l mar
{-# INLINE [0] reverse #-}
-- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
-- the input and result reversed.
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr f z0 (Stream next0 s0 len) = Stream next (Scan1 z0 s0) (len+1) -- HINT maybe too low
where
{-# INLINE next #-}
next (Scan1 z s) = Yield z (Scan2 z s)
next (Scan2 z s) = case next0 s of
Yield x s' -> let !x' = f x z
in Yield x' (Scan2 x' s')
Skip s' -> Skip (Scan2 z s')
Done -> Done
{-# INLINE reverseScanr #-}
-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
-- value. However, the length of the result is limited by the
-- first argument to 'unfoldrN'. This function is more efficient than
-- 'unfoldr' when the length of the result is known.
unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
unfoldrN n = S.unfoldrNI n
{-# INLINE [0] unfoldrN #-}
-------------------------------------------------------------------------------
-- ** Indexing streams
-- | /O(n)/ stream index (subscript) operator, starting from 0.
index :: Stream Char -> Int -> Char
index = S.indexI
{-# INLINE [0] index #-}
-- | The 'findIndex' function takes a predicate and a stream and
-- returns the index of the first element in the stream
-- satisfying the predicate.
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex = S.findIndexI
{-# INLINE [0] findIndex #-}
-- | /O(n)/ The 'count' function returns the number of times the query
-- element appears in the given stream.
countChar :: Char -> Stream Char -> Int
countChar = S.countCharI
{-# INLINE [0] countChar #-}
-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
-- function to each element of a 'Text', passing an accumulating
-- parameter from left to right, and returns a final 'Text'.
mapAccumL :: (a -> Char -> (a,Char)) -> a -> Stream Char -> (a, Text)
mapAccumL f z0 (Stream next0 s0 len) = (nz, I.text na 0 nl)
where
(na,(nz,nl)) = A.run2 (A.new mlen >>= \arr -> outer arr mlen z0 s0 0)
where mlen = upperBound 4 len
outer arr top = loop
where
loop !z !s !i =
case next0 s of
Done -> return (arr, (z,i))
Skip s' -> loop z s' i
Yield x s'
| j >= top -> {-# SCC "mapAccumL/resize" #-} do
let top' = (top + 1) `shiftL` 1
arr' <- A.new top'
A.copyM arr' 0 arr 0 top
outer arr' top' z s i
| otherwise -> do d <- unsafeWrite arr i c
loop z' s' (i+d)
where (z',c) = f z x
j | ord c < 0x10000 = i
| otherwise = i + 1
{-# INLINE [0] mapAccumL #-}
|