File: Parse.hs

package info (click to toggle)
haskell-memory 0.18.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 324 kB
  • sloc: haskell: 3,362; makefile: 7
file content (258 lines) | stat: -rw-r--r-- 9,791 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
-- |
-- Module      : Data.ByteArray.Parse
-- License     : BSD-style
-- Maintainer  : Vincent Hanquez <vincent@snarc.org>
-- Stability   : experimental
-- Portability : portable
--
-- A very simple bytearray parser related to Parsec and Attoparsec
--
-- Simple example:
--
-- > > parse ((,,) <$> take 2 <*> byte 0x20 <*> (bytes "abc" *> anyByte)) "xx abctest"
-- > ParseOK "est" ("xx", 116)
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.ByteArray.Parse
    ( Parser
    , Result(..)
    -- * run the Parser
    , parse
    , parseFeed
    -- * Parser methods
    , hasMore
    , byte
    , anyByte
    , bytes
    , take
    , takeWhile
    , takeAll
    , skip
    , skipWhile
    , skipAll
    , takeStorable
    ) where

import           Control.Monad
import qualified Control.Monad.Fail as Fail
import           Foreign.Storable              (Storable, peek, sizeOf)
import           Data.Word

import           Data.Memory.Internal.Imports
import           Data.Memory.Internal.Compat
import           Data.ByteArray.Types          (ByteArrayAccess, ByteArray)
import qualified Data.ByteArray.Types     as B
import qualified Data.ByteArray.Methods   as B

import           Prelude hiding (take, takeWhile)

-- | Simple parsing result, that represent respectively:
--
-- * failure: with the error message
--
-- * continuation: that need for more input data
--
-- * success: the remaining unparsed data and the parser value
data Result byteArray a =
      ParseFail String
    | ParseMore (Maybe byteArray -> Result byteArray a)
    | ParseOK   byteArray a

instance (Show ba, Show a) => Show (Result ba a) where
    show (ParseFail err) = "ParseFailure: " ++ err
    show (ParseMore _)   = "ParseMore _"
    show (ParseOK b a)   = "ParseOK " ++ show a ++ " " ++ show b

-- | The continuation of the current buffer, and the error string
type Failure byteArray r = byteArray -> String -> Result byteArray r

-- | The continuation of the next buffer value, and the parsed value
type Success byteArray a r = byteArray -> a -> Result byteArray r

-- | Simple ByteString parser structure
newtype Parser byteArray a = Parser
    { runParser :: forall r . byteArray
                           -> Failure byteArray r
                           -> Success byteArray a r
                           -> Result byteArray r }

instance Functor (Parser byteArray) where
    fmap f p = Parser $ \buf err ok ->
        runParser p buf err (\b a -> ok b (f a))
instance Applicative (Parser byteArray) where
    pure v    = Parser $ \buf _ ok -> ok buf v
    (<*>) d e = d >>= \b -> e >>= \a -> return (b a)
instance Monad (Parser byteArray) where
#if !(MIN_VERSION_base(4,13,0))
    fail          = Fail.fail
#endif
    return        = pure
    m >>= k       = Parser $ \buf err ok ->
         runParser m buf err (\buf' a -> runParser (k a) buf' err ok)
instance Fail.MonadFail (Parser byteArray) where
    fail errorMsg = Parser $ \buf err _ -> err buf ("Parser failed: " ++ errorMsg)
instance MonadPlus (Parser byteArray) where
    mzero = fail "MonadPlus.mzero"
    mplus f g = Parser $ \buf err ok ->
        -- rewrite the err callback of @f to call @g
        runParser f buf (\_ _ -> runParser g buf err ok) ok
instance Alternative (Parser byteArray) where
    empty = fail "Alternative.empty"
    (<|>) = mplus

-- | Run a parser on an @initial byteArray.
--
-- If the Parser need more data than available, the @feeder function
-- is automatically called and fed to the More continuation.
parseFeed :: (ByteArrayAccess byteArray, Monad m)
          => m (Maybe byteArray)
          -> Parser byteArray a
          -> byteArray
          -> m (Result byteArray a)
parseFeed feeder p initial = loop $ parse p initial
  where loop (ParseMore k) = feeder >>= (loop . k)
        loop r             = return r

-- | Run a Parser on a ByteString and return a 'Result'
parse :: ByteArrayAccess byteArray
      => Parser byteArray a -> byteArray -> Result byteArray a
parse p s = runParser p s (\_ msg -> ParseFail msg) (\b a -> ParseOK b a)

------------------------------------------------------------

-- When needing more data, getMore append the next data
-- to the current buffer. if no further data, then
-- the err callback is called.
getMore :: ByteArray byteArray => Parser byteArray ()
getMore = Parser $ \buf err ok -> ParseMore $ \nextChunk ->
    case nextChunk of
        Nothing -> err buf "EOL: need more data"
        Just nc
            | B.null nc -> runParser getMore buf err ok
            | otherwise -> ok (B.append buf nc) ()

-- Only used by takeAll, which accumulate all the remaining data
-- until ParseMore is fed a Nothing value.
--
-- getAll cannot fail.
getAll :: ByteArray byteArray => Parser byteArray ()
getAll = Parser $ \buf err ok -> ParseMore $ \nextChunk ->
    case nextChunk of
        Nothing -> ok buf ()
        Just nc -> runParser getAll (B.append buf nc) err ok

-- Only used by skipAll, which flush all the remaining data
-- until ParseMore is fed a Nothing value.
--
-- flushAll cannot fail.
flushAll :: ByteArray byteArray => Parser byteArray ()
flushAll = Parser $ \buf err ok -> ParseMore $ \nextChunk ->
    case nextChunk of
        Nothing -> ok buf ()
        Just _  -> runParser flushAll B.empty err ok

------------------------------------------------------------
hasMore :: ByteArray byteArray => Parser byteArray Bool
hasMore = Parser $ \buf err ok ->
    if B.null buf
        then ParseMore $ \nextChunk ->
                case nextChunk of
                    Nothing -> ok buf False
                    Just nc -> runParser hasMore nc err ok
        else ok buf True

-- | Get the next byte from the parser
anyByte :: ByteArray byteArray => Parser byteArray Word8
anyByte = Parser $ \buf err ok ->
    case B.uncons buf of
        Nothing      -> runParser (getMore >> anyByte) buf err ok
        Just (c1,b2) -> ok b2 c1

-- | Parse a specific byte at current position
--
-- if the byte is different than the expected on,
-- this parser will raise a failure.
byte :: ByteArray byteArray => Word8 -> Parser byteArray ()
byte w = Parser $ \buf err ok ->
    case B.uncons buf of
        Nothing      -> runParser (getMore >> byte w) buf err ok
        Just (c1,b2) | c1 == w   -> ok b2 ()
                     | otherwise -> err buf ("byte " ++ show w ++ " : failed : got " ++ show c1)

-- | Parse a sequence of bytes from current position
--
-- if the following bytes don't match the expected
-- bytestring completely, the parser will raise a failure
bytes :: (Show ba, Eq ba, ByteArray ba) => ba -> Parser ba ()
bytes allExpected = consumeEq allExpected
  where errMsg = "bytes " ++ show allExpected ++ " : failed"

        -- partially consume as much as possible or raise an error.
        consumeEq expected = Parser $ \actual err ok ->
            let eLen = B.length expected in
            if B.length actual >= eLen
                then    -- enough data for doing a full match
                        let (aMatch,aRem) = B.splitAt eLen actual
                         in if aMatch == expected
                                then ok aRem ()
                                else err actual errMsg
                else    -- not enough data, match as much as we have, and then recurse.
                        let (eMatch, eRem) = B.splitAt (B.length actual) expected
                         in if actual == eMatch
                                then runParser (getMore >> consumeEq eRem) B.empty err ok
                                else err actual errMsg

------------------------------------------------------------

-- | Take a storable from the current position in the stream
takeStorable :: (ByteArray byteArray, Storable d)
             => Parser byteArray d
takeStorable = anyStorable undefined
  where
    anyStorable :: ByteArray byteArray => Storable d => d -> Parser byteArray d
    anyStorable a = do
        buf <- take (sizeOf a)
        return $ unsafeDoIO $ B.withByteArray buf $ \ptr -> peek ptr

-- | Take @n bytes from the current position in the stream
take :: ByteArray byteArray => Int -> Parser byteArray byteArray
take n = Parser $ \buf err ok ->
    if B.length buf >= n
        then let (b1,b2) = B.splitAt n buf in ok b2 b1
        else runParser (getMore >> take n) buf err ok

-- | Take bytes while the @predicate hold from the current position in the stream
takeWhile :: ByteArray byteArray => (Word8 -> Bool) -> Parser byteArray byteArray
takeWhile predicate = Parser $ \buf err ok ->
    let (b1, b2) = B.span predicate buf
     in if B.null b2
            then runParser (getMore >> takeWhile predicate) buf err ok
            else ok b2 b1

-- | Take the remaining bytes from the current position in the stream
takeAll :: ByteArray byteArray => Parser byteArray byteArray
takeAll = Parser $ \buf err ok ->
    runParser (getAll >> returnBuffer) buf err ok
  where
    returnBuffer = Parser $ \buf _ ok -> ok B.empty buf

-- | Skip @n bytes from the current position in the stream
skip :: ByteArray byteArray => Int -> Parser byteArray ()
skip n = Parser $ \buf err ok ->
    if B.length buf >= n
        then ok (B.drop n buf) ()
        else runParser (getMore >> skip (n - B.length buf)) B.empty err ok

-- | Skip bytes while the @predicate hold from the current position in the stream
skipWhile :: ByteArray byteArray => (Word8 -> Bool) -> Parser byteArray ()
skipWhile p = Parser $ \buf err ok ->
    let (_, b2) = B.span p buf 
     in if B.null b2
            then runParser (getMore >> skipWhile p) B.empty err ok
            else ok b2 ()

-- | Skip all the remaining bytes from the current position in the stream
skipAll :: ByteArray byteArray => Parser byteArray ()
skipAll = Parser $ \buf err ok -> runParser flushAll buf err ok