1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
|
{-# LANGUAGE CPP, BangPatterns #-}
-- |
-- Module : BoundedWrite
-- Copyright : (c) 2010 Simon Meier
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : https://github.com/blaze-builder
-- Stability : stable
-- Portability : tested on GHC only
--
-- A more general/efficient write type.
--
module BoundedWrite (main) where
import Foreign
import Data.Monoid
import Data.Char
import Foreign.UPtr
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy as L
import Blaze.ByteString.Builder.Internal
import Blaze.ByteString.Builder.Write
import Blaze.ByteString.Builder.Word
import Criterion.Main
------------------------------------------------------------------------------
-- Benchmarks
------------------------------------------------------------------------------
main :: IO ()
main = defaultMain $ concat
{-
[ benchmark "mconcat . map (fromWriteSingleton writeChar)"
bfrom3Chars
from3Chars
chars3
]
-}
[ benchmark "mconcat . map fromWord8"
(mconcat . map bfromWord8)
(mconcat . map fromWord8)
word8s
]
where
benchmark name boundedF staticF x =
[ bench (name ++ " <- bounded write") $
whnf (L.length . toLazyByteString . boundedF) x
, bench (name ++ " <- static write") $
whnf (L.length . toLazyByteString . staticF) x
]
word8s :: [Word8]
word8s = take 100000 $ cycle [0..]
{-# NOINLINE word8s #-}
chars :: [Char]
chars = take 100000 $ ['\0'..]
{-# NOINLINE chars #-}
chars2 :: [(Char,Char)]
chars2 = zip chars chars
{-# NOINLINE chars2 #-}
chars3 :: [(Char, Char, Char)]
chars3 = zip3 chars (reverse chars) (reverse chars)
{-# NOINLINE chars3 #-}
bfromChars = (mconcat . map (fromBWriteSingleton bwriteChar))
{-# NOINLINE bfromChars #-}
fromChars = (mconcat . map (fromWriteSingleton writeChar))
{-# NOINLINE fromChars #-}
bfrom2Chars = (mconcat . map (fromBWriteSingleton (\(c1, c2) -> bwriteChar c1 `mappend` bwriteChar c2)))
{-# NOINLINE bfrom2Chars #-}
from2Chars = (mconcat . map (fromWriteSingleton (\(c1, c2) -> writeChar c1 `mappend` writeChar c2)))
{-# NOINLINE from2Chars #-}
bfrom3Chars = (mconcat . map (fromBWriteSingleton (\(c1, c2, c3) -> bwriteChar c1 `mappend` bwriteChar c2 `mappend` bwriteChar c3)))
{-# NOINLINE bfrom3Chars #-}
from3Chars = (mconcat . map (fromWriteSingleton (\(c1, c2, c3) -> writeChar c1 `mappend` writeChar c2 `mappend` writeChar c3)))
{-# NOINLINE from3Chars #-}
------------------------------------------------------------------------------
-- The Bounded Write Type
------------------------------------------------------------------------------
-- * GRRR* GHC is too 'clever'... code where we branch and each branch should
-- execute a few IO actions and then return a value cannot be taught to GHC.
-- At least not such that it returns the value of the branches unpacked.
--
-- Hmm.. at least he behaves much better for the Monoid instance of BWrite
-- than the one for Write. Serializing UTF-8 chars gets a slowdown of a
-- factor 2 when 2 chars are composed. Perhaps I should try out the writeList
-- instances also, as they may be more sensitive to to much work per Char.
--
data BWrite = BWrite {-# UNPACK #-} !Int (UPtr -> UPtr)
newtype UWrite = UWrite { runUWrite :: UPtr -> UPtr }
instance Monoid UWrite where
mempty = UWrite $ \x -> x
{-# INLINE mempty #-}
(UWrite uw1) `mappend` (UWrite uw2) = UWrite (\up -> uw2 (uw1 up))
{-# INLINE mappend #-}
instance Monoid BWrite where
mempty = BWrite 0 (\x -> x)
{-# INLINE mempty #-}
(BWrite b1 io1) `mappend` (BWrite b2 io2) =
BWrite (b1 + b2) (\op -> io2 (io1 op))
{-# INLINE mappend #-}
execWrite :: IO () -> UPtr -> UPtr
execWrite io op' = S.inlinePerformIO io `seq` op'
{-# INLINE execWrite #-}
execWriteSize :: (Ptr Word8 -> IO ()) -> Int -> UPtr -> UPtr
execWriteSize io size op = execWrite (io (uptrToPtr op)) (op `plusUPtr` size)
{-# INLINE execWriteSize #-}
staticBWrite :: Int -> (Ptr Word8 -> IO ()) -> BWrite
staticBWrite size io = BWrite size (execWriteSize io size)
{-# INLINE staticBWrite #-}
bwriteWord8 :: Word8 -> BWrite
bwriteWord8 x = staticBWrite 1 (`poke` x)
{-# INLINE bwriteWord8 #-}
fromBWrite :: BWrite -> Builder
fromBWrite (BWrite size io) =
Builder step
where
step k !pf !pe
| pf `plusPtr` size <= pe = do
let !pf' = io (ptrToUPtr pf)
k (uptrToPtr pf') pe
| otherwise = return $ BufferFull size pf (step k)
{-# INLINE fromBWrite #-}
fromBWriteSingleton :: (a -> BWrite) -> a -> Builder
fromBWriteSingleton write =
mkPut
where
mkPut x = Builder step
where
step k !pf !pe
| pf `plusPtr` size <= pe = do
let !pf' = io (ptrToUPtr pf)
k (uptrToPtr pf') pe
| otherwise = return $ BufferFull size pf (step k)
where
BWrite size io = write x
{-# INLINE fromBWriteSingleton #-}
bfromWord8 :: Word8 -> Builder
bfromWord8 = fromBWriteSingleton bwriteWord8
-- Utf-8 encoding
-----------------
bwriteChar :: Char -> BWrite
bwriteChar c = BWrite 4 (encodeCharUtf8 f1 f2 f3 f4 c)
where
f1 x = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
poke ptr x )
(uptr `plusUPtr` 1)
f2 x1 x2 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
poke ptr x1
poke (ptr `plusPtr` 1) x2 )
(uptr `plusUPtr` 2)
f3 x1 x2 x3 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
poke ptr x1
poke (ptr `plusPtr` 1) x2
poke (ptr `plusPtr` 2) x3 )
(uptr `plusUPtr` 3)
f4 x1 x2 x3 x4 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
poke ptr x1
poke (ptr `plusPtr` 1) x2
poke (ptr `plusPtr` 2) x3
poke (ptr `plusPtr` 3) x4 )
(uptr `plusUPtr` 4)
{-# INLINE bwriteChar #-}
writeChar :: Char -> Write
writeChar = encodeCharUtf8 f1 f2 f3 f4
where
f1 x = Write 1 $ \ptr -> poke ptr x
f2 x1 x2 = Write 2 $ \ptr -> do poke ptr x1
poke (ptr `plusPtr` 1) x2
f3 x1 x2 x3 = Write 3 $ \ptr -> do poke ptr x1
poke (ptr `plusPtr` 1) x2
poke (ptr `plusPtr` 2) x3
f4 x1 x2 x3 x4 = Write 4 $ \ptr -> do poke ptr x1
poke (ptr `plusPtr` 1) x2
poke (ptr `plusPtr` 2) x3
poke (ptr `plusPtr` 3) x4
{-# INLINE writeChar #-}
-- | Encode a Unicode character to another datatype, using UTF-8. This function
-- acts as an abstract way of encoding characters, as it is unaware of what
-- needs to happen with the resulting bytes: you have to specify functions to
-- deal with those.
--
encodeCharUtf8 :: (Word8 -> a) -- ^ 1-byte UTF-8
-> (Word8 -> Word8 -> a) -- ^ 2-byte UTF-8
-> (Word8 -> Word8 -> Word8 -> a) -- ^ 3-byte UTF-8
-> (Word8 -> Word8 -> Word8 -> Word8 -> a) -- ^ 4-byte UTF-8
-> Char -- ^ Input 'Char'
-> a -- ^ Result
encodeCharUtf8 f1 f2 f3 f4 c = case ord c of
x | x <= 0x7F -> f1 $ fromIntegral x
| x <= 0x07FF ->
let x1 = fromIntegral $ (x `shiftR` 6) + 0xC0
x2 = fromIntegral $ (x .&. 0x3F) + 0x80
in f2 x1 x2
| x <= 0xFFFF ->
let x1 = fromIntegral $ (x `shiftR` 12) + 0xE0
x2 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80
x3 = fromIntegral $ (x .&. 0x3F) + 0x80
in f3 x1 x2 x3
| otherwise ->
let x1 = fromIntegral $ (x `shiftR` 18) + 0xF0
x2 = fromIntegral $ ((x `shiftR` 12) .&. 0x3F) + 0x80
x3 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80
x4 = fromIntegral $ (x .&. 0x3F) + 0x80
in f4 x1 x2 x3 x4
{-# INLINE encodeCharUtf8 #-}
|