1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
|
--
-- HTTP client for use with io-streams
--
-- Copyright © 2012-2021 Athae Eredh Siniath and Others
--
-- The code in this file, and the program it is a part of, is
-- made available to you by its authors as open source software:
-- you can redistribute it and/or modify it under the terms of
-- the BSD licence.
--
-- Significant portions of this file were written while studying
-- the HTTP request parser implementation in the Snap Framework;
-- snap-core's src/Snap/Internal/Parsing.hs and snap-server's
-- src/Snap/Internal/Http/Parser.hs, and various utility functions
-- have been cloned from there.
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_HADDOCK hide, not-home #-}
module Network.Http.ResponseParser (
readResponseHeader,
readResponseBody,
UnexpectedCompression (..),
-- for testing
readDecimal,
) where
import Prelude hiding (take, takeWhile)
import Control.Exception (Exception, throwIO)
import Control.Monad (void)
import Control.Monad.IO.Class (liftIO)
import Data.Attoparsec.ByteString.Char8
import Data.Bits (Bits (..))
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.CaseInsensitive (mk)
import Data.Char (ord)
import Data.Int (Int64)
import Data.Typeable (Typeable)
import System.IO.Streams (Generator, InputStream)
import qualified System.IO.Streams as Streams
import qualified System.IO.Streams.Attoparsec as Streams
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
import Network.Http.Internal
import Network.Http.Utilities
{-
The chunk size coming down from the server is somewhat arbitrary;
it's really just an indication of how many bytes need to be read
before the next size marker or end marker - neither of which has
anything to do with streaming on our side. Instead, we'll feed
bytes into our InputStream at an appropriate intermediate size.
-}
__BITE_SIZE__ :: Int
__BITE_SIZE__ = 32 * 1024
{-
Process the reply from the server up to the end of the headers as
deliniated by a blank line.
-}
readResponseHeader :: InputStream ByteString -> IO Response
readResponseHeader i = do
(sc, sm) <- Streams.parseFromStream parseStatusLine i
hs <- readHeaderFields i
let h = buildHeaders hs
let te = case lookupHeader h "Transfer-Encoding" of
Just x' ->
if mk x' == "chunked"
then Chunked
else None
Nothing -> None
let ce = case lookupHeader h "Content-Encoding" of
Just x' ->
if mk x' == "gzip"
then Gzip
else Identity
Nothing -> Identity
let nm = case lookupHeader h "Content-Length" of
Just x' -> Just (readDecimal x' :: Int64)
Nothing -> case sc of
204 -> Just 0
304 -> Just 0
100 -> Just 0
_ -> Nothing
return
Response
{ pStatusCode = sc
, pStatusMsg = sm
, pTransferEncoding = te
, pContentEncoding = ce
, pContentLength = nm
, pHeaders = h
}
parseStatusLine :: Parser (Int, ByteString)
parseStatusLine = do
sc <- string "HTTP/1." *> satisfy version *> char ' ' *> decimal <* char ' '
sm <- takeTill (== '\r') <* crlf
return (sc, sm)
where
version c = c == '1' || c == '0'
crlf :: Parser ByteString
crlf = string "\r\n"
---------------------------------------------------------------------
{-
Switch on the encoding and compression headers, wrapping the raw
InputStream to present the entity body's actual bytes.
-}
readResponseBody :: Response -> InputStream ByteString -> IO (InputStream ByteString)
readResponseBody p i1 = do
i2 <- case t of
None -> case l of
Just n -> readFixedLengthBody i1 n
Nothing -> readUnlimitedBody i1
Chunked -> readChunkedBody i1
i3 <- case c of
Identity -> return i2
Gzip -> readCompressedBody i2
Deflate -> throwIO (UnexpectedCompression $ show c)
return i3
where
t = pTransferEncoding p
c = pContentEncoding p
l = pContentLength p
readDecimal :: (Enum α, Num α, Bits α) => ByteString -> α
readDecimal str' =
S.foldl' f 0 x'
where
f !cnt !i = cnt * 10 + digitToInt i
x' = head $ S.words str'
{-# INLINE digitToInt #-}
digitToInt :: (Enum α, Num α, Bits α) => Char -> α
digitToInt c
| c >= '0' && c <= '9' = toEnum $! ord c - ord '0'
| otherwise = error $ "'" ++ [c] ++ "' is not an ascii digit"
{-# INLINE readDecimal #-}
data UnexpectedCompression = UnexpectedCompression String
deriving (Typeable, Show)
instance Exception UnexpectedCompression
---------------------------------------------------------------------
{-
Process a response body in chunked transfer encoding, taking the
resultant bytes and reproducing them as an InputStream
-}
readChunkedBody :: InputStream ByteString -> IO (InputStream ByteString)
readChunkedBody i1 = do
i2 <- Streams.fromGenerator (consumeChunks i1)
return i2
{-
For a response body in chunked transfer encoding, iterate over
the individual chunks, reading the size parameter, then
looping over that chunk in bites of at most __BYTE_SIZE__,
yielding them to the receiveResponse InputStream accordingly.
-}
consumeChunks :: InputStream ByteString -> Generator ByteString ()
consumeChunks i1 = do
!n <- parseSize
if n > 0
then do
-- read one or more bites, then loop to next chunk
go n
skipCRLF
consumeChunks i1
else do
-- skip "trailers" and consume final CRLF
skipEnd
where
go 0 = return ()
go !n = do
(!x', !r) <- liftIO $ readN n i1
Streams.yield x'
go r
parseSize = do
n <- liftIO $ Streams.parseFromStream transferChunkSize i1
return n
skipEnd = do
liftIO $ do
_ <- readHeaderFields i1
return ()
skipCRLF = do
liftIO $ do
_ <- Streams.parseFromStream crlf i1
return ()
{-
Read the specified number of bytes up to a maximum of __BITE_SIZE__,
returning a resultant ByteString and the number of bytes remaining.
-}
readN :: Int -> InputStream ByteString -> IO (ByteString, Int)
readN n i1 = do
!x' <- Streams.readExactly p i1
return (x', r)
where
!d = n - size
!p =
if d > 0
then size
else n
!r =
if d > 0
then d
else 0
size = __BITE_SIZE__
transferChunkSize :: Parser (Int)
transferChunkSize = do
!n <- hexadecimal
void (takeTill (== '\r'))
void crlf
return n
---------------------------------------------------------------------
{-
This has the rather crucial side effect of terminating the stream
after the requested number of bytes. Otherwise, code handling
responses waits on more input until an HTTP timeout occurs.
-}
readFixedLengthBody :: InputStream ByteString -> Int64 -> IO (InputStream ByteString)
readFixedLengthBody i1 n = do
i2 <- Streams.takeBytes n i1
return i2
{-
On the other hand, there is the (predominently HTTP/1.0) case
where there is no content length sent and no chunking, with the
result that only the connection closing marks the end of the
response body.
-}
readUnlimitedBody :: InputStream ByteString -> IO (InputStream ByteString)
readUnlimitedBody i1 = do
return i1
---------------------------------------------------------------------
readCompressedBody :: InputStream ByteString -> IO (InputStream ByteString)
readCompressedBody i1 = do
i2 <- Streams.gunzip i1
return i2
|