File: ResponseParser.hs

package info (click to toggle)
haskell-http-streams 0.8.9.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 244 kB
  • sloc: haskell: 1,972; makefile: 4
file content (273 lines) | stat: -rw-r--r-- 7,900 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
--
-- HTTP client for use with io-streams
--
-- Copyright © 2012-2021 Athae Eredh Siniath and Others
--
-- The code in this file, and the program it is a part of, is
-- made available to you by its authors as open source software:
-- you can redistribute it and/or modify it under the terms of
-- the BSD licence.
--
-- Significant portions of this file were written while studying
-- the HTTP request parser implementation in the Snap Framework;
-- snap-core's src/Snap/Internal/Parsing.hs and snap-server's
-- src/Snap/Internal/Http/Parser.hs, and various utility functions
-- have been cloned from there.
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_HADDOCK hide, not-home #-}

module Network.Http.ResponseParser (
    readResponseHeader,
    readResponseBody,
    UnexpectedCompression (..),
    -- for testing
    readDecimal,
) where

import Prelude hiding (take, takeWhile)

import Control.Exception (Exception, throwIO)
import Control.Monad (void)
import Control.Monad.IO.Class (liftIO)
import Data.Attoparsec.ByteString.Char8
import Data.Bits (Bits (..))
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.CaseInsensitive (mk)
import Data.Char (ord)
import Data.Int (Int64)
import Data.Typeable (Typeable)
import System.IO.Streams (Generator, InputStream)
import qualified System.IO.Streams as Streams
import qualified System.IO.Streams.Attoparsec as Streams

#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif

import Network.Http.Internal
import Network.Http.Utilities

{-
    The chunk size coming down from the server is somewhat arbitrary;
    it's really just an indication of how many bytes need to be read
    before the next size marker or end marker - neither of which has
    anything to do with streaming on our side. Instead, we'll feed
    bytes into our InputStream at an appropriate intermediate size.
-}
__BITE_SIZE__ :: Int
__BITE_SIZE__ = 32 * 1024

{-
    Process the reply from the server up to the end of the headers as
    deliniated by a blank line.
-}
readResponseHeader :: InputStream ByteString -> IO Response
readResponseHeader i = do
    (sc, sm) <- Streams.parseFromStream parseStatusLine i

    hs <- readHeaderFields i

    let h = buildHeaders hs
    let te = case lookupHeader h "Transfer-Encoding" of
            Just x' ->
                if mk x' == "chunked"
                    then Chunked
                    else None
            Nothing -> None

    let ce = case lookupHeader h "Content-Encoding" of
            Just x' ->
                if mk x' == "gzip"
                    then Gzip
                    else Identity
            Nothing -> Identity

    let nm = case lookupHeader h "Content-Length" of
            Just x' -> Just (readDecimal x' :: Int64)
            Nothing -> case sc of
                204 -> Just 0
                304 -> Just 0
                100 -> Just 0
                _ -> Nothing

    return
        Response
            { pStatusCode = sc
            , pStatusMsg = sm
            , pTransferEncoding = te
            , pContentEncoding = ce
            , pContentLength = nm
            , pHeaders = h
            }

parseStatusLine :: Parser (Int, ByteString)
parseStatusLine = do
    sc <- string "HTTP/1." *> satisfy version *> char ' ' *> decimal <* char ' '
    sm <- takeTill (== '\r') <* crlf
    return (sc, sm)
  where
    version c = c == '1' || c == '0'

crlf :: Parser ByteString
crlf = string "\r\n"

---------------------------------------------------------------------

{-
    Switch on the encoding and compression headers, wrapping the raw
    InputStream to present the entity body's actual bytes.
-}
readResponseBody :: Response -> InputStream ByteString -> IO (InputStream ByteString)
readResponseBody p i1 = do
    i2 <- case t of
        None -> case l of
            Just n -> readFixedLengthBody i1 n
            Nothing -> readUnlimitedBody i1
        Chunked -> readChunkedBody i1

    i3 <- case c of
        Identity -> return i2
        Gzip -> readCompressedBody i2
        Deflate -> throwIO (UnexpectedCompression $ show c)

    return i3
  where
    t = pTransferEncoding p
    c = pContentEncoding p
    l = pContentLength p

readDecimal :: (Enum α, Num α, Bits α) => ByteString -> α
readDecimal str' =
    S.foldl' f 0 x'
  where
    f !cnt !i = cnt * 10 + digitToInt i

    x' = head $ S.words str'

    {-# INLINE digitToInt #-}
    digitToInt :: (Enum α, Num α, Bits α) => Char -> α
    digitToInt c
        | c >= '0' && c <= '9' = toEnum $! ord c - ord '0'
        | otherwise = error $ "'" ++ [c] ++ "' is not an ascii digit"
{-# INLINE readDecimal #-}

data UnexpectedCompression = UnexpectedCompression String
    deriving (Typeable, Show)

instance Exception UnexpectedCompression

---------------------------------------------------------------------

{-
    Process a response body in chunked transfer encoding, taking the
    resultant bytes and reproducing them as an InputStream
-}
readChunkedBody :: InputStream ByteString -> IO (InputStream ByteString)
readChunkedBody i1 = do
    i2 <- Streams.fromGenerator (consumeChunks i1)
    return i2

{-
    For a response body in chunked transfer encoding, iterate over
    the individual chunks, reading the size parameter, then
    looping over that chunk in bites of at most __BYTE_SIZE__,
    yielding them to the receiveResponse InputStream accordingly.
-}
consumeChunks :: InputStream ByteString -> Generator ByteString ()
consumeChunks i1 = do
    !n <- parseSize

    if n > 0
        then do
            -- read one or more bites, then loop to next chunk
            go n
            skipCRLF
            consumeChunks i1
        else do
            -- skip "trailers" and consume final CRLF
            skipEnd
  where
    go 0 = return ()
    go !n = do
        (!x', !r) <- liftIO $ readN n i1
        Streams.yield x'
        go r

    parseSize = do
        n <- liftIO $ Streams.parseFromStream transferChunkSize i1
        return n

    skipEnd = do
        liftIO $ do
            _ <- readHeaderFields i1
            return ()

    skipCRLF = do
        liftIO $ do
            _ <- Streams.parseFromStream crlf i1
            return ()

{-
    Read the specified number of bytes up to a maximum of __BITE_SIZE__,
    returning a resultant ByteString and the number of bytes remaining.
-}

readN :: Int -> InputStream ByteString -> IO (ByteString, Int)
readN n i1 = do
    !x' <- Streams.readExactly p i1
    return (x', r)
  where
    !d = n - size

    !p =
        if d > 0
            then size
            else n

    !r =
        if d > 0
            then d
            else 0

    size = __BITE_SIZE__

transferChunkSize :: Parser (Int)
transferChunkSize = do
    !n <- hexadecimal
    void (takeTill (== '\r'))
    void crlf
    return n

---------------------------------------------------------------------

{-
    This has the rather crucial side effect of terminating the stream
    after the requested number of bytes. Otherwise, code handling
    responses waits on more input until an HTTP timeout occurs.
-}
readFixedLengthBody :: InputStream ByteString -> Int64 -> IO (InputStream ByteString)
readFixedLengthBody i1 n = do
    i2 <- Streams.takeBytes n i1
    return i2

{-
    On the other hand, there is the (predominently HTTP/1.0) case
    where there is no content length sent and no chunking, with the
    result that only the connection closing marks the end of the
    response body.
-}
readUnlimitedBody :: InputStream ByteString -> IO (InputStream ByteString)
readUnlimitedBody i1 = do
    return i1

---------------------------------------------------------------------

readCompressedBody :: InputStream ByteString -> IO (InputStream ByteString)
readCompressedBody i1 = do
    i2 <- Streams.gunzip i1
    return i2