1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
-- #hide
-----------------------------------------------------------------------------
-- |
-- Module : Network.CGI.Multipart
-- Copyright : (c) Peter Thiemann 2001,2002
-- (c) Bjorn Bringert 2005-2006
-- License : BSD-style
--
-- Maintainer : bjorn@bringert.net
-- Stability : experimental
-- Portability : non-portable
--
-- Parsing of the multipart format from RFC2046.
-- Partly based on code from WASHMail.
--
-----------------------------------------------------------------------------
module Network.CGI.Multipart
(
-- * Multi-part messages
MultiPart(..), BodyPart(..), Header
, parseMultipartBody, hGetMultipartBody
, showMultipartBody
-- * Headers
, ContentType(..), ContentTransferEncoding(..)
, ContentDisposition(..)
, parseContentType
, parseContentTransferEncoding
, parseContentDisposition
, getContentType
, getContentTransferEncoding
, getContentDisposition
) where
import Control.Monad
import Data.Int (Int64)
import Data.List (intersperse)
import Data.Maybe
import System.IO (Handle)
import Network.CGI.RFC822Headers
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.ByteString.Lazy.Char8 (ByteString)
--
-- * Multi-part stuff.
--
data MultiPart = MultiPart [BodyPart]
deriving (Show, Read, Eq, Ord)
data BodyPart = BodyPart [Header] ByteString
deriving (Show, Read, Eq, Ord)
-- | Read a multi-part message from a 'ByteString'.
parseMultipartBody :: String -- ^ Boundary
-> ByteString -> MultiPart
parseMultipartBody b =
MultiPart . mapMaybe parseBodyPart . splitParts (BS.pack b)
-- | Read a multi-part message from a 'Handle'.
-- Fails on parse errors.
hGetMultipartBody :: String -- ^ Boundary
-> Handle
-> IO MultiPart
hGetMultipartBody b = liftM (parseMultipartBody b) . BS.hGetContents
parseBodyPart :: ByteString -> Maybe BodyPart
parseBodyPart s =
do
let (hdr,bdy) = splitAtEmptyLine s
hs <- parseM pHeaders "<input>" (BS.unpack hdr)
return $ BodyPart hs bdy
showMultipartBody :: String -> MultiPart -> ByteString
showMultipartBody b (MultiPart bs) =
unlinesCRLF $ foldr (\x xs -> d:showBodyPart x:xs) [c,BS.empty] bs
where d = BS.pack ("--" ++ b)
c = BS.pack ("--" ++ b ++ "--")
showBodyPart :: BodyPart -> ByteString
showBodyPart (BodyPart hs c) =
unlinesCRLF $ [BS.pack (n++": "++v) | (n,v) <- hs] ++ [BS.empty,c]
--
-- * Splitting into multipart parts.
--
-- | Split a multipart message into the multipart parts.
splitParts :: ByteString -- ^ The boundary, without the initial dashes
-> ByteString
-> [ByteString]
splitParts b = spl . dropPreamble b
where
spl x = case splitAtBoundary b x of
Nothing -> []
Just (s1,d,s2) | isClose b d -> [s1]
| otherwise -> s1:spl s2
-- | Drop everything up to and including the first line starting
-- with the boundary.
dropPreamble :: ByteString -- ^ The boundary, without the initial dashes
-> ByteString
-> ByteString
dropPreamble b s | BS.null s = BS.empty
| isBoundary b s = dropLine s
| otherwise = dropPreamble b (dropLine s)
-- | Split a string at the first boundary line.
splitAtBoundary :: ByteString -- ^ The boundary, without the initial dashes
-> ByteString -- ^ String to split.
-> Maybe (ByteString,ByteString,ByteString)
-- ^ The part before the boundary, the boundary line,
-- and the part after the boundary line. The CRLF
-- before and the CRLF (if any) after the boundary line
-- are not included in any of the strings returned.
-- Returns 'Nothing' if there is no boundary.
splitAtBoundary b s = spl 0
where
spl i = case findCRLF (BS.drop i s) of
Nothing -> Nothing
Just (j,l) | isBoundary b s2 -> Just (s1,d,s3)
| otherwise -> spl (i+j+l)
where
s1 = BS.take (i+j) s
s2 = BS.drop (i+j+l) s
(d,s3) = splitAtCRLF s2
-- | Check whether a string starts with two dashes followed by
-- the given boundary string.
isBoundary :: ByteString -- ^ The boundary, without the initial dashes
-> ByteString
-> Bool
isBoundary b s = startsWithDashes s && b `BS.isPrefixOf` BS.drop 2 s
-- | Check whether a string for which 'isBoundary' returns true
-- has two dashes after the boudary string.
isClose :: ByteString -- ^ The boundary, without the initial dashes
-> ByteString
-> Bool
isClose b s = startsWithDashes (BS.drop (2+BS.length b) s)
-- | Checks whether a string starts with two dashes.
startsWithDashes :: ByteString -> Bool
startsWithDashes s = BS.pack "--" `BS.isPrefixOf` s
--
-- * RFC 2046 CRLF
--
crlf :: ByteString
crlf = BS.pack "\r\n"
unlinesCRLF :: [ByteString] -> ByteString
unlinesCRLF = BS.concat . intersperse crlf
-- | Drop everything up to and including the first CRLF.
dropLine :: ByteString -> ByteString
dropLine s = snd (splitAtCRLF s)
-- | Split a string at the first empty line. The CRLF (if any) before the
-- empty line is included in the first result. The CRLF after the
-- empty line is not included in the result.
-- If there is no empty line, the entire input is returned
-- as the first result.
splitAtEmptyLine :: ByteString -> (ByteString, ByteString)
splitAtEmptyLine s | startsWithCRLF s = (BS.empty, dropCRLF s)
| otherwise = spl 0
where
spl i = case findCRLF (BS.drop i s) of
Nothing -> (s, BS.empty)
Just (j,l) | startsWithCRLF s2 -> (s1, dropCRLF s2)
| otherwise -> spl (i+j+l)
where (s1,s2) = BS.splitAt (i+j+l) s
-- | Split a string at the first CRLF. The CRLF is not included
-- in any of the returned strings.
-- If there is no CRLF, the entire input is returned
-- as the first string.
splitAtCRLF :: ByteString -- ^ String to split.
-> (ByteString,ByteString)
splitAtCRLF s = case findCRLF s of
Nothing -> (s,BS.empty)
Just (i,l) -> (s1, BS.drop l s2)
where (s1,s2) = BS.splitAt i s
-- | Get the index and length of the first CRLF, if any.
findCRLF :: ByteString -- ^ String to split.
-> Maybe (Int64,Int64)
findCRLF s =
case findCRorLF s of
Nothing -> Nothing
Just j | BS.null (BS.drop (j+1) s) -> Just (j,1)
Just j -> case (BS.index s j, BS.index s (j+1)) of
('\n','\r') -> Just (j,2)
('\r','\n') -> Just (j,2)
_ -> Just (j,1)
findCRorLF :: ByteString -> Maybe Int64
findCRorLF s = BS.findIndex (\c -> c == '\n' || c == '\r') s
startsWithCRLF :: ByteString -> Bool
startsWithCRLF s = not (BS.null s) && (c == '\n' || c == '\r')
where c = BS.index s 0
-- | Drop an initial CRLF, if any. If the string is empty,
-- nothing is done. If the string does not start with CRLF,
-- the first character is dropped.
dropCRLF :: ByteString -> ByteString
dropCRLF s | BS.null s = BS.empty
| BS.null (BS.drop 1 s) = BS.empty
| c0 == '\n' && c1 == '\r' = BS.drop 2 s
| c0 == '\r' && c1 == '\n' = BS.drop 2 s
| otherwise = BS.drop 1 s
where c0 = BS.index s 0
c1 = BS.index s 1
|