File: BoundedWrite.hs

package info (click to toggle)
haskell-blaze-builder 0.4.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 476 kB
  • sloc: haskell: 5,891; makefile: 87; ansic: 39
file content (240 lines) | stat: -rw-r--r-- 8,519 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
{-# LANGUAGE CPP, BangPatterns #-}
-- |
-- Module      : BoundedWrite
-- Copyright   : (c) 2010 Simon Meier
-- License     : BSD3-style (see LICENSE)
--
-- Maintainer  : https://github.com/blaze-builder
-- Stability   : stable
-- Portability : tested on GHC only
--
-- A more general/efficient write type.
--
module BoundedWrite (main) where

import Foreign
import Data.Monoid
import Data.Char

import Foreign.UPtr

import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy as L

import Blaze.ByteString.Builder.Internal
import Blaze.ByteString.Builder.Write
import Blaze.ByteString.Builder.Word

import Criterion.Main

------------------------------------------------------------------------------
-- Benchmarks
------------------------------------------------------------------------------

main :: IO ()
main = defaultMain $ concat
    {-
    [ benchmark "mconcat . map (fromWriteSingleton writeChar)"
        bfrom3Chars
        from3Chars
        chars3
    ]
    -}
    [ benchmark "mconcat . map fromWord8"
        (mconcat . map bfromWord8)
        (mconcat . map fromWord8)
        word8s
    ]
  where
    benchmark name boundedF staticF x =
        [ bench (name ++ " <- bounded write") $
            whnf (L.length . toLazyByteString . boundedF) x
        , bench (name ++ " <- static write") $
            whnf (L.length . toLazyByteString . staticF) x
        ]

word8s :: [Word8]
word8s = take 100000 $ cycle [0..]
{-# NOINLINE word8s #-}

chars :: [Char]
chars = take 100000 $ ['\0'..]
{-# NOINLINE chars #-}

chars2 :: [(Char,Char)]
chars2 = zip chars chars
{-# NOINLINE chars2 #-}

chars3 :: [(Char, Char, Char)]
chars3 = zip3 chars (reverse chars) (reverse chars)
{-# NOINLINE chars3 #-}

bfromChars = (mconcat . map (fromBWriteSingleton bwriteChar))
{-# NOINLINE bfromChars #-}

fromChars = (mconcat . map (fromWriteSingleton writeChar))
{-# NOINLINE fromChars #-}

bfrom2Chars = (mconcat . map (fromBWriteSingleton (\(c1, c2) -> bwriteChar c1 `mappend` bwriteChar c2)))
{-# NOINLINE bfrom2Chars #-}

from2Chars = (mconcat . map (fromWriteSingleton (\(c1, c2) -> writeChar c1 `mappend` writeChar c2)))
{-# NOINLINE from2Chars #-}

bfrom3Chars = (mconcat . map (fromBWriteSingleton (\(c1, c2, c3) -> bwriteChar c1 `mappend` bwriteChar c2 `mappend` bwriteChar c3)))
{-# NOINLINE bfrom3Chars #-}

from3Chars = (mconcat . map (fromWriteSingleton (\(c1, c2, c3) -> writeChar c1 `mappend` writeChar c2 `mappend` writeChar c3)))
{-# NOINLINE from3Chars #-}

------------------------------------------------------------------------------
-- The Bounded Write Type
------------------------------------------------------------------------------

-- * GRRR* GHC is too 'clever'... code where we branch and each branch should
-- execute a few IO actions and then return a value cannot be taught to GHC.
-- At least not such that it returns the value of the branches unpacked.
--
-- Hmm.. at least he behaves much better for the Monoid instance of BWrite
-- than the one for Write. Serializing UTF-8 chars gets a slowdown of a
-- factor 2 when 2 chars are composed. Perhaps I should try out the writeList
-- instances also, as they may be more sensitive to to much work per Char.
--
data BWrite = BWrite {-# UNPACK #-} !Int (UPtr -> UPtr)

newtype UWrite = UWrite { runUWrite :: UPtr -> UPtr }

instance Monoid UWrite where
  mempty = UWrite $ \x -> x
  {-# INLINE mempty #-}
  (UWrite uw1) `mappend` (UWrite uw2) = UWrite (\up -> uw2 (uw1 up))
  {-# INLINE mappend #-}

instance Monoid BWrite where
  mempty = BWrite 0 (\x -> x)
  {-# INLINE mempty #-}
  (BWrite b1 io1) `mappend` (BWrite b2 io2) =
    BWrite (b1 + b2) (\op -> io2 (io1 op))
  {-# INLINE mappend #-}

execWrite :: IO () -> UPtr -> UPtr
execWrite io op' = S.inlinePerformIO io `seq` op'
{-# INLINE execWrite #-}

execWriteSize :: (Ptr Word8 -> IO ()) -> Int -> UPtr -> UPtr
execWriteSize io size op = execWrite (io (uptrToPtr op)) (op `plusUPtr` size)
{-# INLINE execWriteSize #-}

staticBWrite :: Int -> (Ptr Word8 -> IO ()) -> BWrite
staticBWrite size io = BWrite size (execWriteSize io size)
{-# INLINE staticBWrite #-}

bwriteWord8 :: Word8 -> BWrite
bwriteWord8 x = staticBWrite 1 (`poke` x)
{-# INLINE bwriteWord8 #-}

fromBWrite :: BWrite -> Builder
fromBWrite (BWrite size io) =
    Builder step
  where
    step k !pf !pe
      | pf `plusPtr` size <= pe = do
          let !pf' = io (ptrToUPtr pf)
          k (uptrToPtr pf') pe
      | otherwise = return $ BufferFull size pf (step k)
{-# INLINE fromBWrite #-}

fromBWriteSingleton :: (a -> BWrite) -> a -> Builder
fromBWriteSingleton write =
    mkPut
  where
    mkPut x = Builder step
      where
        step k !pf !pe
          | pf `plusPtr` size <= pe = do
              let !pf' = io (ptrToUPtr pf)
              k (uptrToPtr pf') pe
          | otherwise               = return $ BufferFull size pf (step k)
          where
            BWrite size io = write x
{-# INLINE fromBWriteSingleton #-}

bfromWord8 :: Word8 -> Builder
bfromWord8 = fromBWriteSingleton bwriteWord8

-- Utf-8 encoding
-----------------

bwriteChar :: Char -> BWrite
bwriteChar c = BWrite 4 (encodeCharUtf8 f1 f2 f3 f4 c)
  where
    f1 x =  \uptr -> execWrite (do let !ptr = uptrToPtr uptr
                                   poke ptr x )
                               (uptr `plusUPtr` 1)

    f2 x1 x2 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
                                      poke ptr x1
                                      poke (ptr `plusPtr` 1) x2 )
                                  (uptr `plusUPtr` 2)

    f3 x1 x2 x3 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
                                         poke ptr x1
                                         poke (ptr `plusPtr` 1) x2
                                         poke (ptr `plusPtr` 2) x3 )
                                     (uptr `plusUPtr` 3)

    f4 x1 x2 x3 x4 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr
                                            poke ptr x1
                                            poke (ptr `plusPtr` 1) x2
                                            poke (ptr `plusPtr` 2) x3
                                            poke (ptr `plusPtr` 3) x4 )
                                        (uptr `plusUPtr` 4)
{-# INLINE bwriteChar #-}

writeChar :: Char -> Write
writeChar = encodeCharUtf8 f1 f2 f3 f4
  where
    f1 x = Write 1 $ \ptr -> poke ptr x

    f2 x1 x2 = Write 2 $ \ptr -> do poke ptr x1
                                    poke (ptr `plusPtr` 1) x2

    f3 x1 x2 x3 = Write 3 $ \ptr -> do poke ptr x1
                                       poke (ptr `plusPtr` 1) x2
                                       poke (ptr `plusPtr` 2) x3

    f4 x1 x2 x3 x4 = Write 4 $ \ptr -> do poke ptr x1
                                          poke (ptr `plusPtr` 1) x2
                                          poke (ptr `plusPtr` 2) x3
                                          poke (ptr `plusPtr` 3) x4
{-# INLINE writeChar #-}

-- | Encode a Unicode character to another datatype, using UTF-8. This function
-- acts as an abstract way of encoding characters, as it is unaware of what
-- needs to happen with the resulting bytes: you have to specify functions to
-- deal with those.
--
encodeCharUtf8 :: (Word8 -> a)                             -- ^ 1-byte UTF-8
               -> (Word8 -> Word8 -> a)                    -- ^ 2-byte UTF-8
               -> (Word8 -> Word8 -> Word8 -> a)           -- ^ 3-byte UTF-8
               -> (Word8 -> Word8 -> Word8 -> Word8 -> a)  -- ^ 4-byte UTF-8
               -> Char                                     -- ^ Input 'Char'
               -> a                                        -- ^ Result
encodeCharUtf8 f1 f2 f3 f4 c = case ord c of
    x | x <= 0x7F -> f1 $ fromIntegral x
      | x <= 0x07FF ->
           let x1 = fromIntegral $ (x `shiftR` 6) + 0xC0
               x2 = fromIntegral $ (x .&. 0x3F)   + 0x80
           in f2 x1 x2
      | x <= 0xFFFF ->
           let x1 = fromIntegral $ (x `shiftR` 12) + 0xE0
               x2 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80
               x3 = fromIntegral $ (x .&. 0x3F) + 0x80
           in f3 x1 x2 x3
      | otherwise ->
           let x1 = fromIntegral $ (x `shiftR` 18) + 0xF0
               x2 = fromIntegral $ ((x `shiftR` 12) .&. 0x3F) + 0x80
               x3 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80
               x4 = fromIntegral $ (x .&. 0x3F) + 0x80
           in f4 x1 x2 x3 x4
{-# INLINE encodeCharUtf8 #-}