1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
|
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DoAndIfThenElse #-}
-- |
-- Module : Data.ByteString.Base64.Internal
-- Copyright : (c) 2010 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded strings.
module Data.ByteString.Base64.Internal
( encodeWith
, decodeWithTable
, decodeLenientWithTable
, mkEncodeTable
, done
, peek8, poke8, peek8_32
, reChunkIn
, Padding(..)
, withBS
, mkBS
) where
import Data.Bits ((.|.), (.&.), shiftL, shiftR)
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..), mallocByteString)
import Data.Word (Word8, Word16, Word32)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, castForeignPtr)
import Foreign.Ptr (Ptr, castPtr, minusPtr, plusPtr)
import Foreign.Storable (peek, peekElemOff, poke)
import System.IO.Unsafe (unsafePerformIO)
peek8 :: Ptr Word8 -> IO Word8
peek8 = peek
poke8 :: Ptr Word8 -> Word8 -> IO ()
poke8 = poke
peek8_32 :: Ptr Word8 -> IO Word32
peek8_32 = fmap fromIntegral . peek8
data Padding = Padded | Don'tCare | Unpadded deriving Eq
-- | Encode a string into base64 form. The result will always be a multiple
-- of 4 bytes in length.
encodeWith :: Padding -> EncodeTable -> ByteString -> ByteString
encodeWith !padding (ET alfaFP encodeTable) !bs = withBS bs go
where
go !sptr !slen
| slen > maxBound `div` 4 =
error "Data.ByteString.Base64.encode: input too long"
| otherwise = do
let dlen = (slen + 2) `div` 3 * 4
dfp <- mallocByteString dlen
withForeignPtr alfaFP $ \aptr ->
withForeignPtr encodeTable $ \ep -> do
let aidx n = peek8 (aptr `plusPtr` n)
sEnd = sptr `plusPtr` slen
finish !n = return $ mkBS dfp n
fill !dp !sp !n
| sp `plusPtr` 2 >= sEnd = complete (castPtr dp) sp n
| otherwise = {-# SCC "encode/fill" #-} do
i <- peek8_32 sp
j <- peek8_32 (sp `plusPtr` 1)
k <- peek8_32 (sp `plusPtr` 2)
let w = i `shiftL` 16 .|. j `shiftL` 8 .|. k
enc = peekElemOff ep . fromIntegral
poke dp =<< enc (w `shiftR` 12)
poke (dp `plusPtr` 2) =<< enc (w .&. 0xfff)
fill (dp `plusPtr` 4) (sp `plusPtr` 3) (n + 4)
complete dp sp n
| sp == sEnd = finish n
| otherwise = {-# SCC "encode/complete" #-} do
let peekSP m f = (f . fromIntegral) `fmap` peek8 (sp `plusPtr` m)
twoMore = sp `plusPtr` 2 == sEnd
equals = 0x3d :: Word8
doPad = padding == Padded
{-# INLINE equals #-}
!a <- peekSP 0 ((`shiftR` 2) . (.&. 0xfc))
!b <- peekSP 0 ((`shiftL` 4) . (.&. 0x03))
poke8 dp =<< aidx a
if twoMore
then do
!b' <- peekSP 1 ((.|. b) . (`shiftR` 4) . (.&. 0xf0))
!c <- aidx =<< peekSP 1 ((`shiftL` 2) . (.&. 0x0f))
poke8 (dp `plusPtr` 1) =<< aidx b'
poke8 (dp `plusPtr` 2) c
if doPad
then poke8 (dp `plusPtr` 3) equals >> finish (n + 4)
else finish (n + 3)
else do
poke8 (dp `plusPtr` 1) =<< aidx b
if doPad
then do
poke8 (dp `plusPtr` 2) equals
poke8 (dp `plusPtr` 3) equals
finish (n + 4)
else finish (n + 2)
withForeignPtr dfp (\dptr -> fill (castPtr dptr) sptr 0)
data EncodeTable = ET !(ForeignPtr Word8) !(ForeignPtr Word16)
-- The encoding table is constructed such that the expansion of a 12-bit
-- block to a 16-bit block can be done by a single Word16 copy from the
-- correspoding table entry to the target address. The 16-bit blocks are
-- stored in big-endian order, as the indices into the table are built in
-- big-endian order.
mkEncodeTable :: ByteString -> EncodeTable
#if MIN_VERSION_bytestring(0,11,0)
mkEncodeTable alphabet@(BS afp _) =
case table of BS fp _ -> ET afp (castForeignPtr fp)
#else
mkEncodeTable alphabet@(PS afp _ _) =
case table of PS fp _ _ -> ET afp (castForeignPtr fp)
#endif
where
ix = fromIntegral . B.index alphabet
table = B.pack $ concat $ [ [ix j, ix k] | j <- [0..63], k <- [0..63] ]
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
--
-- This function takes the decoding table (for @base64@ or @base64url@) as
-- the first parameter.
--
-- For validation of padding properties, see note: $Validation
--
decodeWithTable :: Padding -> ForeignPtr Word8 -> ByteString -> Either String ByteString
decodeWithTable padding !decodeFP bs
| B.length bs == 0 = Right B.empty
| otherwise = case padding of
Padded
| r == 0 -> withBS bs go
| r == 1 -> Left "Base64-encoded bytestring has invalid size"
| otherwise -> Left "Base64-encoded bytestring is unpadded or has invalid padding"
Don'tCare
| r == 0 -> withBS bs go
| r == 2 -> withBS (B.append bs (B.replicate 2 0x3d)) go
| r == 3 -> validateLastPad bs invalidPad $ withBS (B.append bs (B.replicate 1 0x3d)) go
| otherwise -> Left "Base64-encoded bytestring has invalid size"
Unpadded
| r == 0 -> validateLastPad bs noPad $ withBS bs go
| r == 2 -> validateLastPad bs noPad $ withBS (B.append bs (B.replicate 2 0x3d)) go
| r == 3 -> validateLastPad bs noPad $ withBS (B.append bs (B.replicate 1 0x3d)) go
| otherwise -> Left "Base64-encoded bytestring has invalid size"
where
!r = B.length bs `rem` 4
noPad = "Base64-encoded bytestring required to be unpadded"
invalidPad = "Base64-encoded bytestring has invalid padding"
go !sptr !slen = do
dfp <- mallocByteString (slen `quot` 4 * 3)
withForeignPtr decodeFP (\ !decptr ->
withForeignPtr dfp (\dptr ->
decodeLoop decptr sptr dptr (sptr `plusPtr` slen) dfp))
decodeLoop
:: Ptr Word8
-- ^ decoding table pointer
-> Ptr Word8
-- ^ source pointer
-> Ptr Word8
-- ^ destination pointer
-> Ptr Word8
-- ^ source end pointer
-> ForeignPtr Word8
-- ^ destination foreign pointer (used for finalizing string)
-> IO (Either String ByteString)
decodeLoop !dtable !sptr !dptr !end !dfp = go dptr sptr
where
err p = return . Left
$ "invalid character at offset: "
++ show (p `minusPtr` sptr)
padErr p = return . Left
$ "invalid padding at offset: "
++ show (p `minusPtr` sptr)
canonErr p = return . Left
$ "non-canonical encoding detected at offset: "
++ show (p `minusPtr` sptr)
look :: Ptr Word8 -> IO Word32
look !p = do
!i <- peek p
!v <- peekElemOff dtable (fromIntegral i)
return (fromIntegral v)
go !dst !src
| plusPtr src 4 >= end = do
!a <- look src
!b <- look (src `plusPtr` 1)
!c <- look (src `plusPtr` 2)
!d <- look (src `plusPtr` 3)
finalChunk dst src a b c d
| otherwise = do
!a <- look src
!b <- look (src `plusPtr` 1)
!c <- look (src `plusPtr` 2)
!d <- look (src `plusPtr` 3)
decodeChunk dst src a b c d
-- | Decodes chunks of 4 bytes at a time, recombining into
-- 3 bytes. Note that in the inner loop stage, no padding
-- characters are admissible.
--
decodeChunk !dst !src !a !b !c !d
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 = padErr (plusPtr src 2)
| d == 0x63 = padErr (plusPtr src 3)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| otherwise = do
let !w = (shiftL a 18
.|. shiftL b 12
.|. shiftL c 6
.|. d) :: Word32
poke8 dst (fromIntegral (shiftR w 16))
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
poke8 (plusPtr dst 2) (fromIntegral w)
go (plusPtr dst 3) (plusPtr src 4)
-- | Decode the final 4 bytes in the string, recombining into
-- 3 bytes. Note that in this stage, we can have padding chars
-- but only in the final 2 positions.
--
finalChunk !dst !src a b c d
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 && d /= 0x63 = err (plusPtr src 3) -- make sure padding is coherent.
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| otherwise = do
let !w = (shiftL a 18
.|. shiftL b 12
.|. shiftL c 6
.|. d) :: Word32
poke8 dst (fromIntegral (shiftR w 16))
if c == 0x63 && d == 0x63
then
if sanityCheckPos b mask_4bits
then return $ Right $ mkBS dfp (1 + (dst `minusPtr` dptr))
else canonErr (plusPtr src 1)
else if d == 0x63
then
if sanityCheckPos c mask_2bits
then do
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
return $ Right $ mkBS dfp (2 + (dst `minusPtr` dptr))
else canonErr (plusPtr src 2)
else do
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
poke8 (plusPtr dst 2) (fromIntegral w)
return $ Right $ mkBS dfp (3 + (dst `minusPtr` dptr))
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input. This function
-- takes the decoding table (for @base64@ or @base64url@) as the first
-- paramert.
decodeLenientWithTable :: ForeignPtr Word8 -> ByteString -> ByteString
decodeLenientWithTable !decodeFP !bs = withBS bs go
where
go !sptr !slen
| dlen <= 0 = return B.empty
| otherwise = do
dfp <- mallocByteString dlen
withForeignPtr decodeFP $ \ !decptr -> do
let finish dbytes
| dbytes > 0 = return $ mkBS dfp dbytes
| otherwise = return B.empty
sEnd = sptr `plusPtr` slen
fill !dp !sp !n
| sp >= sEnd = finish n
| otherwise = {-# SCC "decodeLenientWithTable/fill" #-}
let look :: Bool -> Ptr Word8
-> (Ptr Word8 -> Word32 -> IO ByteString)
-> IO ByteString
{-# INLINE look #-}
look skipPad p0 f = go' p0
where
go' p | p >= sEnd = f (sEnd `plusPtr` (-1)) done
| otherwise = {-# SCC "decodeLenient/look" #-} do
ix <- fromIntegral `fmap` peek8 p
v <- peek8 (decptr `plusPtr` ix)
if v == x || v == done && skipPad
then go' (p `plusPtr` 1)
else f (p `plusPtr` 1) (fromIntegral v)
in look True sp $ \ !aNext !aValue ->
look True aNext $ \ !bNext !bValue ->
if aValue == done || bValue == done
then finish n
else
look False bNext $ \ !cNext !cValue ->
look False cNext $ \ !dNext !dValue -> do
let w = aValue `shiftL` 18 .|. bValue `shiftL` 12 .|.
cValue `shiftL` 6 .|. dValue
poke8 dp $ fromIntegral (w `shiftR` 16)
if cValue == done
then finish (n + 1)
else do
poke8 (dp `plusPtr` 1) $ fromIntegral (w `shiftR` 8)
if dValue == done
then finish (n + 2)
else do
poke8 (dp `plusPtr` 2) $ fromIntegral w
fill (dp `plusPtr` 3) dNext (n+3)
withForeignPtr dfp $ \dptr -> fill dptr sptr 0
where
!dlen = (slen + 3) `div` 4 * 3
x :: Integral a => a
x = 255
{-# INLINE x #-}
done :: Integral a => a
done = 99
{-# INLINE done #-}
-- This takes a list of ByteStrings, and returns a list in which each
-- (apart from possibly the last) has length that is a multiple of n
reChunkIn :: Int -> [ByteString] -> [ByteString]
reChunkIn !n = go
where
go [] = []
go (y : ys) = case B.length y `divMod` n of
(_, 0) -> y : go ys
(d, _) -> case B.splitAt (d * n) y of
(prefix, suffix) -> prefix : fixup suffix ys
fixup acc [] = [acc]
fixup acc (z : zs) = case B.splitAt (n - B.length acc) z of
(prefix, suffix) ->
let acc' = acc `B.append` prefix
in if B.length acc' == n
then let zs' = if B.null suffix
then zs
else suffix : zs
in acc' : go zs'
else -- suffix must be null
fixup acc' zs
-- $Validation
--
-- This function checks that the last char of a bytestring is '='
-- and, if true, fails with a message or completes some io action.
--
-- This is necessary to check when decoding permissively (i.e. filling in padding chars).
-- Consider the following 4 cases of a string of length l:
--
-- l = 0 mod 4: No pad chars are added, since the input is assumed to be good.
-- l = 1 mod 4: Never an admissible length in base64
-- l = 2 mod 4: 2 padding chars are added. If padding chars are present in the last 4 chars of the string,
-- they will fail to decode as final quanta.
-- l = 3 mod 4: 1 padding char is added. In this case a string is of the form <body> + <padchar>. If adding the
-- pad char "completes" the string so that it is `l = 0 mod 4`, then this may possibly form corrupted data.
-- This case is degenerate and should be disallowed.
--
-- Hence, permissive decodes should only fill in padding chars when it makes sense to add them. That is,
-- if an input is degenerate, it should never succeed when we add padding chars. We need the following invariant to hold:
--
-- @
-- B64U.decodeUnpadded <|> B64U.decodePadded ~ B64U.decodePadded
-- @
--
-- This means the only char we need to check is the last one, and only to disallow `l = 3 mod 4`.
--
validateLastPad
:: ByteString
-- ^ input to validate
-> String
-- ^ error msg
-> Either String ByteString
-> Either String ByteString
validateLastPad !bs err !io
| B.last bs == 0x3d = Left err
| otherwise = io
{-# INLINE validateLastPad #-}
-- | Sanity check an index against a bitmask to make sure
-- it's coherent. If pos & mask == 0, we're good. If not, we should fail.
--
sanityCheckPos :: Word32 -> Word8 -> Bool
sanityCheckPos pos mask = fromIntegral pos .&. mask == 0
{-# INLINE sanityCheckPos #-}
-- | Mask 2 bits
--
mask_2bits :: Word8
mask_2bits = 3 -- (1 << 2) - 1
{-# NOINLINE mask_2bits #-}
-- | Mask 4 bits
--
mask_4bits :: Word8
mask_4bits = 15 -- (1 << 4) - 1
{-# NOINLINE mask_4bits #-}
-- | Back-compat shim for bytestring >=0.11. Constructs a
-- bytestring from a foreign ptr and a length. Offset is 0.
--
mkBS :: ForeignPtr Word8 -> Int -> ByteString
#if MIN_VERSION_bytestring(0,11,0)
mkBS dfp n = BS dfp n
#else
mkBS dfp n = PS dfp 0 n
#endif
{-# INLINE mkBS #-}
-- | Back-compat shim for bytestring >=0.11. Unwraps the foreign ptr of
-- a bytestring, executing an IO action as a function of the underlying
-- pointer and some starting length.
--
-- Note: in `unsafePerformIO`.
--
withBS :: ByteString -> (Ptr Word8 -> Int -> IO a) -> a
#if MIN_VERSION_bytestring(0,11,0)
withBS (BS !sfp !slen) f = unsafePerformIO $
withForeignPtr sfp $ \p -> f p slen
#else
withBS (PS !sfp !soff !slen) f = unsafePerformIO $
withForeignPtr sfp $ \p -> f (plusPtr p soff) slen
#endif
{-# INLINE withBS #-}
|