File: Word8.hs

package info (click to toggle)
haskell-binary-parsers 0.2.3.0-8
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,272 kB
  • sloc: haskell: 2,151; makefile: 5
file content (314 lines) | stat: -rw-r--r-- 9,584 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP          #-}
-- |
-- Module      :  Data.Binary.Parser.Word8
-- Copyright   :  Bryan O'Sullivan 2007-2015, Winterland 2016
-- License     :  BSD3
--
-- Maintainer  :  drkoster@qq.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient combinator parsing for 'B.ByteString' strings.
--
module Data.Binary.Parser.Word8 where

import           Control.Applicative
import           Control.Monad
import           Data.Binary.Get
import           Data.Binary.Get.Internal
import           Data.ByteString          (ByteString)
import qualified Data.ByteString          as B
import           Data.ByteString.Internal (ByteString (..))
import qualified Data.ByteString.Unsafe   as B
import           Data.Word
import           Foreign.ForeignPtr       (withForeignPtr)
import           Foreign.Ptr              (minusPtr, plusPtr)
import qualified Foreign.Storable         as Storable (Storable (peek))
import           Prelude                  hiding (takeWhile)

#if MIN_VERSION_bytestring(0,10,6)
import           Data.ByteString.Internal (accursedUnutterablePerformIO)
#else
import           Data.ByteString.Internal (inlinePerformIO)

{-# INLINE accursedUnutterablePerformIO #-}
-- | You must be truly desperate to come to me for help.
accursedUnutterablePerformIO :: IO a -> a
accursedUnutterablePerformIO = inlinePerformIO
#endif

--------------------------------------------------------------------------------

-- | Match any byte, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Get (Maybe Word8)
peekMaybe = do
    e <- isEmpty
    if e then return Nothing
         else Just <$> peek
{-# INLINE peekMaybe #-}

-- | Match any byte, to perform lookahead.  Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Get Word8
peek = do
    ensureN 1
    bs <- get
    return (B.unsafeHead bs)
{-# INLINE peek #-}

-- | The parser @satisfy p@ succeeds for any byte for which the
-- predicate @p@ returns 'True'. Returns the byte that is actually
-- parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= 48 && w <= 57
--
satisfy :: (Word8 -> Bool) -> Get Word8
satisfy p = do
    ensureN 1
    bs <- get
    let w = B.unsafeHead bs
    if p w then put (B.unsafeTail bs) >> return w
           else fail "satisfy"
{-# INLINE satisfy #-}

-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed byte that was parsed.
--
satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Get a
satisfyWith f p = do
    ensureN 1
    bs <- get
    let w = B.unsafeHead bs
        r = f w
    if p r then put (B.unsafeTail bs) >> return r
           else fail "satisfyWith"
{-# INLINE satisfyWith #-}

-- | Match a specific byte.
--
word8 :: Word8 -> Get ()
word8 c = do
    ensureN 1
    bs <- get
    let w = B.unsafeHead bs
    if c == w then put (B.unsafeTail bs)
              else fail "word8"
{-# INLINE word8 #-}

-- | Match any byte.
--
anyWord8 :: Get Word8
anyWord8 = getWord8
{-# INLINE anyWord8 #-}

-- | The parser @skipWord8 p@ succeeds for any byte for which the predicate @p@ returns 'True'.
--
skipWord8 :: (Word8 -> Bool) -> Get ()
skipWord8 p = do
    ensureN 1
    bs <- get
    let w = B.unsafeHead bs
    if p w then put (B.unsafeTail bs)
              else fail "skip"
{-# INLINE skipWord8 #-}

--------------------------------------------------------------------------------

-- | This is a faster version of 'skip' for small N (smaller than chunk size).
--
skipN :: Int -> Get ()
skipN n = do
    bs <- get
    let l = B.length bs
    if l >= n then put (B.unsafeDrop n bs)
              else put B.empty >> skip (l - n)
{-# INLINE skipN #-}

-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Word8 -> Bool) -> Get ByteString
takeTill p = do
    bs <- get
    let (want, rest) = B.break p bs
    put rest
    if B.null rest then B.concat . reverse <$> go [want] else return want
  where
    go acc = do
        e <- isEmpty -- isEmpty will draw input here
        if e
        then return acc
        else do
            bs <- get
            let (want, rest) = B.break p bs
                acc' = want : acc
            put rest
            if B.null rest then go acc' else return acc'
{-# INLINE takeTill #-}

-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Word8 -> Bool) -> Get ByteString
takeWhile p = do
    bs <- get
    let (want, rest) = B.span p bs
    put rest
    if B.null rest then B.concat . reverse <$> go [want] else return want
  where
    go acc = do
        e <- isEmpty
        if e
        then return acc
        else do
            bs <- get
            let (want, rest) = B.span p bs
                acc' = want : acc
            put rest
            if B.null rest then go acc' else return acc'
{-# INLINE takeWhile #-}

-- | Similar to 'takeWhile', but requires the predicate to succeed on at least one byte
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Word8 -> Bool) -> Get ByteString
takeWhile1 p = do
    bs <- takeWhile p
    if B.null bs then fail "takeWhile1" else return bs
{-# INLINE takeWhile1 #-}

-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Word8 -> Bool) -> Get ()
skipWhile p = do
    bs <- get
    let rest = B.dropWhile p bs
    put rest
    when (B.null rest) go
  where
    go = do
        e <- isEmpty
        unless e $ do
            bs <- get
            let rest = B.dropWhile p bs
            put rest
            when (B.null rest) go
{-# INLINE skipWhile #-}

-- | Skip over white space using 'isSpace'.
--
skipSpaces :: Get ()
skipSpaces = skipWhile isSpace
{-# INLINE skipSpaces #-}

-- | @string s@ parses a sequence of bytes that identically match @s@.
--
string :: ByteString -> Get ()
string bs = do
    let l = B.length bs
    bs' <- get
    if l <= B.length bs'              -- current chunk is enough
    then if B.unsafeTake l bs' == bs
        then put (B.unsafeDrop l bs')
        else fail "string"
    else do
        ensureN l
        bs'' <- get
        if B.unsafeTake l bs'' == bs
        then put (B.unsafeDrop l bs'')
        else fail "string"
{-# INLINE string #-}

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each byte of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first byte of input.
--
scan :: s -> (s -> Word8 -> Maybe s) -> Get ByteString
scan s0 consume = withInputChunks s0 consume' B.concat (return . B.concat)
  where
    consume' s1 (PS fp off len) = accursedUnutterablePerformIO $
        withForeignPtr fp $ \ptr0 -> do
            let start = ptr0 `plusPtr` off
                end   = start `plusPtr` len
            go fp off start end start s1
    go fp off start end ptr !s
        | ptr < end = do
            w <- Storable.peek ptr
            case consume s w of
                Just s' -> go fp off start end (ptr `plusPtr` 1) s'
                _       -> do
                    let !len1 = ptr `minusPtr` start
                        !off2 = off + len1
                        !len2 = end `minusPtr` ptr
                    return (Right (PS fp off len1, PS fp off2 len2))
        | otherwise = return (Left s)
{-# INLINE scan #-}

-- | Similar to 'scan', but working on 'ByteString' chunks, The predicate
-- consumes a 'ByteString' chunk and transforms a state argument,
-- and each transformed state is passed to successive invocations of
-- the predicate on each chunk of the input until one chunk got splited to
-- @Right (ByteString, ByteString)@ or the input ends.
--
scanChunks :: s -> Consume s -> Get ByteString
scanChunks s consume = withInputChunks s consume B.concat (return . B.concat)
{-# INLINE scanChunks #-}

--------------------------------------------------------------------------------

-- | Fast 'Word8' predicate for matching ASCII space characters
--
-- >isSpace w = w == 32 || w - 9 <= 4
--
isSpace :: Word8 -> Bool
isSpace w = w == 32 || w - 9 <= 4
{-# INLINE isSpace #-}

-- | Decimal digit predicate.
--
isDigit :: Word8 -> Bool
isDigit w = w - 48 <= 9
{-# INLINE isDigit #-}

-- | Hex digit predicate.
--
isHexDigit :: Word8 -> Bool
isHexDigit w = (w >= 48 && w <= 57) || (w >= 97 && w <= 102) || (w >= 65 && w <= 70)
{-# INLINE isHexDigit #-}

-- | A predicate that matches either a space @\' \'@ or horizontal tab
-- @\'\\t\'@ character.
--
isHorizontalSpace :: Word8 -> Bool
isHorizontalSpace w = w == 32 || w == 9
{-# INLINE isHorizontalSpace #-}

-- | A predicate that matches either a carriage return @\'\\r\'@ or
-- newline @\'\\n\'@ character.
--
isEndOfLine :: Word8 -> Bool
isEndOfLine w = w == 13 || w == 10
{-# INLINE isEndOfLine #-}

--------------------------------------------------------------------------------

-- | Match either a single newline byte @\'\\n\'@, or a carriage
-- return followed by a newline byte @\"\\r\\n\"@.
endOfLine :: Get ()
endOfLine = do
    w <- getWord8
    case w of
        10 -> return ()
        13 -> word8 10
        _  -> fail "endOfLine"
{-# INLINE endOfLine #-}