File: Multipart.hs

package info (click to toggle)
haskell-cgi 3001.1.5.1-5
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 136 kB
  • ctags: 9
  • sloc: haskell: 849; makefile: 79; sh: 43
file content (221 lines) | stat: -rw-r--r-- 7,598 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
-- #hide

-----------------------------------------------------------------------------
-- |
-- Module      :  Network.CGI.Multipart
-- Copyright   :  (c) Peter Thiemann 2001,2002
--                (c) Bjorn Bringert 2005-2006
-- License     :  BSD-style
--
-- Maintainer  :  bjorn@bringert.net
-- Stability   :  experimental
-- Portability :  non-portable
--
-- Parsing of the multipart format from RFC2046.
-- Partly based on code from WASHMail.
--
-----------------------------------------------------------------------------
module Network.CGI.Multipart 
    (
     -- * Multi-part messages
     MultiPart(..), BodyPart(..), Header
    , parseMultipartBody, hGetMultipartBody
    , showMultipartBody
     -- * Headers
    , ContentType(..), ContentTransferEncoding(..)
    , ContentDisposition(..)
    , parseContentType
    , parseContentTransferEncoding
    , parseContentDisposition
    , getContentType
    , getContentTransferEncoding
    , getContentDisposition
    ) where

import Control.Monad
import Data.Int (Int64)
import Data.List (intersperse)
import Data.Maybe
import System.IO (Handle)

import Network.CGI.RFC822Headers

import qualified Data.ByteString.Lazy.Char8 as BS
import Data.ByteString.Lazy.Char8 (ByteString)

--
-- * Multi-part stuff.
--

data MultiPart = MultiPart [BodyPart]
               deriving (Show, Read, Eq, Ord)

data BodyPart = BodyPart [Header] ByteString
                deriving (Show, Read, Eq, Ord)

-- | Read a multi-part message from a 'ByteString'.
parseMultipartBody :: String -- ^ Boundary
                   -> ByteString -> MultiPart
parseMultipartBody b = 
    MultiPart . mapMaybe parseBodyPart . splitParts (BS.pack b)

-- | Read a multi-part message from a 'Handle'.
--   Fails on parse errors.
hGetMultipartBody :: String -- ^ Boundary
                  -> Handle
                  -> IO MultiPart
hGetMultipartBody b = liftM (parseMultipartBody b) . BS.hGetContents



parseBodyPart :: ByteString -> Maybe BodyPart
parseBodyPart s =
    do
    let (hdr,bdy) = splitAtEmptyLine s
    hs <- parseM pHeaders "<input>" (BS.unpack hdr)
    return $ BodyPart hs bdy

showMultipartBody :: String -> MultiPart -> ByteString
showMultipartBody b (MultiPart bs) = 
    unlinesCRLF $ foldr (\x xs -> d:showBodyPart x:xs) [c,BS.empty] bs
 where d = BS.pack ("--" ++ b)
       c = BS.pack ("--" ++ b ++ "--")       

showBodyPart :: BodyPart -> ByteString
showBodyPart (BodyPart hs c) = 
    unlinesCRLF $ [BS.pack (n++": "++v) | (n,v) <- hs] ++ [BS.empty,c]


--
-- * Splitting into multipart parts.
--

-- | Split a multipart message into the multipart parts.
splitParts :: ByteString -- ^ The boundary, without the initial dashes
           -> ByteString 
           -> [ByteString]
splitParts b = spl . dropPreamble b
  where
  spl x = case splitAtBoundary b x of
            Nothing -> []
            Just (s1,d,s2) | isClose b d -> [s1]
                           | otherwise -> s1:spl s2

-- | Drop everything up to and including the first line starting 
--   with the boundary.
dropPreamble :: ByteString -- ^ The boundary, without the initial dashes
             -> ByteString 
             -> ByteString
dropPreamble b s | BS.null s = BS.empty
                 | isBoundary b s = dropLine s
                 | otherwise = dropPreamble b (dropLine s)

-- | Split a string at the first boundary line.
splitAtBoundary :: ByteString -- ^ The boundary, without the initial dashes
                -> ByteString -- ^ String to split.
                -> Maybe (ByteString,ByteString,ByteString)
                   -- ^ The part before the boundary, the boundary line,
                   --   and the part after the boundary line. The CRLF
                   --   before and the CRLF (if any) after the boundary line
                   --   are not included in any of the strings returned.
                   --   Returns 'Nothing' if there is no boundary.
splitAtBoundary b s = spl 0
  where
  spl i = case findCRLF (BS.drop i s) of
              Nothing -> Nothing
              Just (j,l) | isBoundary b s2 -> Just (s1,d,s3)
                         | otherwise -> spl (i+j+l)
                  where 
                  s1 = BS.take (i+j) s
                  s2 = BS.drop (i+j+l) s
                  (d,s3) = splitAtCRLF s2

-- | Check whether a string starts with two dashes followed by
--   the given boundary string.
isBoundary :: ByteString -- ^ The boundary, without the initial dashes
           -> ByteString
           -> Bool
isBoundary b s = startsWithDashes s && b `BS.isPrefixOf` BS.drop 2 s

-- | Check whether a string for which 'isBoundary' returns true
--   has two dashes after the boudary string.
isClose :: ByteString -- ^ The boundary, without the initial dashes
        -> ByteString 
        -> Bool
isClose b s = startsWithDashes (BS.drop (2+BS.length b) s)

-- | Checks whether a string starts with two dashes.
startsWithDashes :: ByteString -> Bool
startsWithDashes s = BS.pack "--" `BS.isPrefixOf` s


--
-- * RFC 2046 CRLF
--

crlf :: ByteString
crlf = BS.pack "\r\n"

unlinesCRLF :: [ByteString] -> ByteString
unlinesCRLF = BS.concat . intersperse crlf

-- | Drop everything up to and including the first CRLF.
dropLine :: ByteString -> ByteString
dropLine s = snd (splitAtCRLF s)

-- | Split a string at the first empty line. The CRLF (if any) before the
--   empty line is included in the first result. The CRLF after the
--   empty line is not included in the result.
--   If there is no empty line, the entire input is returned
--   as the first result.
splitAtEmptyLine :: ByteString -> (ByteString, ByteString)
splitAtEmptyLine s | startsWithCRLF s = (BS.empty, dropCRLF s)
                   | otherwise = spl 0
  where
  spl i = case findCRLF (BS.drop i s) of
              Nothing -> (s, BS.empty)
              Just (j,l) | startsWithCRLF s2 -> (s1, dropCRLF s2)
                         | otherwise -> spl (i+j+l)
                where (s1,s2) = BS.splitAt (i+j+l) s

-- | Split a string at the first CRLF. The CRLF is not included
--   in any of the returned strings.
--   If there is no CRLF, the entire input is returned
--   as the first string.
splitAtCRLF :: ByteString -- ^ String to split.
            -> (ByteString,ByteString)
splitAtCRLF s = case findCRLF s of
                  Nothing -> (s,BS.empty)
                  Just (i,l) -> (s1, BS.drop l s2)
                      where (s1,s2) = BS.splitAt i s

-- | Get the index and length of the first CRLF, if any.
findCRLF :: ByteString -- ^ String to split.
         -> Maybe (Int64,Int64)
findCRLF s = 
    case findCRorLF s of
              Nothing -> Nothing
              Just j | BS.null (BS.drop (j+1) s) -> Just (j,1)
              Just j -> case (BS.index s j, BS.index s (j+1)) of
                           ('\n','\r') -> Just (j,2)
                           ('\r','\n') -> Just (j,2)
                           _           -> Just (j,1)

findCRorLF :: ByteString -> Maybe Int64
findCRorLF s = BS.findIndex (\c -> c == '\n' || c == '\r') s

startsWithCRLF :: ByteString -> Bool
startsWithCRLF s = not (BS.null s) && (c == '\n' || c == '\r')
  where c = BS.index s 0

-- | Drop an initial CRLF, if any. If the string is empty, 
--   nothing is done. If the string does not start with CRLF,
--   the first character is dropped.
dropCRLF :: ByteString -> ByteString
dropCRLF s | BS.null s = BS.empty
           | BS.null (BS.drop 1 s) = BS.empty
           | c0 == '\n' && c1 == '\r' = BS.drop 2 s
           | c0 == '\r' && c1 == '\n' = BS.drop 2 s
           | otherwise = BS.drop 1 s
  where c0 = BS.index s 0
        c1 = BS.index s 1