1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
-----------------------------------------------------------------------------
-- |
-- Module : Network.HTTP.Utils
-- Copyright : See LICENSE file
-- License : BSD
--
-- Maintainer : Ganesh Sittampalam <ganesh@earth.li>
-- Stability : experimental
-- Portability : non-portable (not tested)
--
-- Set of utility functions and definitions used by package modules.
--
module Network.HTTP.Utils
( trim -- :: String -> String
, trimL -- :: String -> String
, trimR -- :: String -> String
, crlf -- :: String
, lf -- :: String
, sp -- :: String
, split -- :: Eq a => a -> [a] -> Maybe ([a],[a])
, splitBy -- :: Eq a => a -> [a] -> [[a]]
, readsOne -- :: Read a => (a -> b) -> b -> String -> b
, dropWhileTail -- :: (a -> Bool) -> [a] -> [a]
, chopAtDelim -- :: Eq a => a -> [a] -> ([a],[a])
, toUTF8BS
, fromUTF8BS
) where
import Data.Bits
import Data.Char
import Data.List ( elemIndex )
import Data.Maybe ( fromMaybe )
import Data.Word ( Word8 )
import qualified Data.ByteString as BS
-- | @crlf@ is our beloved two-char line terminator.
crlf :: String
crlf = "\r\n"
-- | @lf@ is a tolerated line terminator, per RFC 2616 section 19.3.
lf :: String
lf = "\n"
-- | @sp@ lets you save typing one character.
sp :: String
sp = " "
-- | @split delim ls@ splits a list into two parts, the @delim@ occurring
-- at the head of the second list. If @delim@ isn't in @ls@, @Nothing@ is
-- returned.
split :: Eq a => a -> [a] -> Maybe ([a],[a])
split delim list = case delim `elemIndex` list of
Nothing -> Nothing
Just x -> Just $ splitAt x list
-- | @trim str@ removes leading and trailing whitespace from @str@.
trim :: String -> String
trim xs = trimR (trimL xs)
-- | @trimL str@ removes leading whitespace (as defined by 'Data.Char.isSpace')
-- from @str@.
trimL :: String -> String
trimL xs = dropWhile isSpace xs
-- | @trimL str@ removes trailing whitespace (as defined by 'Data.Char.isSpace')
-- from @str@.
trimR :: String -> String
trimR str = fromMaybe "" $ foldr trimIt Nothing str
where
trimIt x (Just xs) = Just (x:xs)
trimIt x Nothing
| isSpace x = Nothing
| otherwise = Just [x]
-- | @splitMany delim ls@ removes the delimiter @delim@ from @ls@.
splitBy :: Eq a => a -> [a] -> [[a]]
splitBy _ [] = []
splitBy c xs =
case break (==c) xs of
(_,[]) -> [xs]
(as,_:bs) -> as : splitBy c bs
-- | @readsOne f def str@ tries to 'read' @str@, taking
-- the first result and passing it to @f@. If the 'read'
-- doesn't succeed, return @def@.
readsOne :: Read a => (a -> b) -> b -> String -> b
readsOne f n str =
case reads str of
((v,_):_) -> f v
_ -> n
-- | @dropWhileTail p ls@ chops off trailing elements from @ls@
-- until @p@ returns @False@.
dropWhileTail :: (a -> Bool) -> [a] -> [a]
dropWhileTail f ls =
case foldr chop Nothing ls of { Just xs -> xs; Nothing -> [] }
where
chop x (Just xs) = Just (x:xs)
chop x _
| f x = Nothing
| otherwise = Just [x]
-- | @chopAtDelim elt ls@ breaks up @ls@ into two at first occurrence
-- of @elt@; @elt@ is elided too. If @elt@ does not occur, the second
-- list is empty and the first is equal to @ls@.
chopAtDelim :: Eq a => a -> [a] -> ([a],[a])
chopAtDelim elt xs =
case break (==elt) xs of
(_,[]) -> (xs,[])
(as,_:bs) -> (as,bs)
toUTF8BS :: String -> BS.ByteString
toUTF8BS = BS.pack . encodeStringUtf8
fromUTF8BS :: BS.ByteString -> String
fromUTF8BS = decodeStringUtf8 . BS.unpack
-- | Encode 'String' to a list of UTF8-encoded octets
--
-- Code-points in the @U+D800@-@U+DFFF@ range will be encoded
-- as the replacement character (i.e. @U+FFFD@).
--
-- The code is extracted from Cabal library, written originally
-- Herbert Valerio Riedel under BSD-3-Clause license
encodeStringUtf8 :: String -> [Word8]
encodeStringUtf8 [] = []
encodeStringUtf8 (c:cs)
| c <= '\x07F' = w8
: encodeStringUtf8 cs
| c <= '\x7FF' = (0xC0 .|. w8ShiftR 6 )
: (0x80 .|. (w8 .&. 0x3F))
: encodeStringUtf8 cs
| c <= '\xD7FF'= (0xE0 .|. w8ShiftR 12 )
: (0x80 .|. (w8ShiftR 6 .&. 0x3F))
: (0x80 .|. (w8 .&. 0x3F))
: encodeStringUtf8 cs
| c <= '\xDFFF'= 0xEF : 0xBF : 0xBD -- U+FFFD
: encodeStringUtf8 cs
| c <= '\xFFFF'= (0xE0 .|. w8ShiftR 12 )
: (0x80 .|. (w8ShiftR 6 .&. 0x3F))
: (0x80 .|. (w8 .&. 0x3F))
: encodeStringUtf8 cs
| otherwise = (0xf0 .|. w8ShiftR 18 )
: (0x80 .|. (w8ShiftR 12 .&. 0x3F))
: (0x80 .|. (w8ShiftR 6 .&. 0x3F))
: (0x80 .|. (w8 .&. 0x3F))
: encodeStringUtf8 cs
where
w8 = fromIntegral (ord c) :: Word8
w8ShiftR :: Int -> Word8
w8ShiftR = fromIntegral . shiftR (ord c)
-- | Decode 'String' from UTF8-encoded octets.
--
-- Invalid data in the UTF8 stream (this includes code-points @U+D800@
-- through @U+DFFF@) will be decoded as the replacement character (@U+FFFD@).
--
-- See also 'encodeStringUtf8'
decodeStringUtf8 :: [Word8] -> String
decodeStringUtf8 = go
where
go :: [Word8] -> String
go [] = []
go (c : cs)
| c <= 0x7F = chr (fromIntegral c) : go cs
| c <= 0xBF = replacementChar : go cs
| c <= 0xDF = twoBytes c cs
| c <= 0xEF = moreBytes 3 0x800 cs (fromIntegral $ c .&. 0xF)
| c <= 0xF7 = moreBytes 4 0x10000 cs (fromIntegral $ c .&. 0x7)
| c <= 0xFB = moreBytes 5 0x200000 cs (fromIntegral $ c .&. 0x3)
| c <= 0xFD = moreBytes 6 0x4000000 cs (fromIntegral $ c .&. 0x1)
| otherwise = replacementChar : go cs
twoBytes :: Word8 -> [Word8] -> String
twoBytes c0 (c1:cs')
| c1 .&. 0xC0 == 0x80
= let d = (fromIntegral (c0 .&. 0x1F) `shiftL` 6)
.|. fromIntegral (c1 .&. 0x3F)
in if d >= 0x80
then chr d : go cs'
else replacementChar : go cs'
twoBytes _ cs' = replacementChar : go cs'
moreBytes :: Int -> Int -> [Word8] -> Int -> [Char]
moreBytes 1 overlong cs' acc
| overlong <= acc && acc <= 0x10FFFF && (acc < 0xD800 || 0xDFFF < acc)
= chr acc : go cs'
| otherwise
= replacementChar : go cs'
moreBytes byteCount overlong (cn:cs') acc
| cn .&. 0xC0 == 0x80
= moreBytes (byteCount-1) overlong cs'
((acc `shiftL` 6) .|. fromIntegral cn .&. 0x3F)
moreBytes _ _ cs' _
= replacementChar : go cs'
replacementChar = '\xfffd'
|