1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
|
-- |Stream the creation of a zip file, e.g., as it's being uploaded.
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE BangPatterns #-}
module Codec.Archive.Zip.Conduit.Zip
( zipStream
, ZipOptions(..)
, ZipInfo(..)
, defaultZipOptions
, ZipEntry(..)
, ZipData(..)
, zipFileData
) where
import qualified Codec.Compression.Zlib.Raw as Z
import Control.Arrow ((&&&), (+++), left)
import Control.DeepSeq (force)
import Control.Monad (when)
import Control.Monad.Catch (MonadThrow)
import Control.Monad.Primitive (PrimMonad)
import Control.Monad.State.Strict (StateT, get)
import Control.Monad.Trans.Resource (MonadResource)
import qualified Data.Binary.Put as P
import Data.Bits (bit, shiftL, shiftR, (.|.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as CC
import Data.Conduit.Lift (stateC, execStateC)
import Data.Conduit.Serialization.Binary (sourcePut)
import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft)
import Data.Maybe (fromMaybe, fromJust)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
import Data.Word (Word16, Word32, Word64)
import Codec.Archive.Zip.Conduit.Types
import Codec.Archive.Zip.Conduit.Internal
-- |Options controlling zip file parameters and features
data ZipOptions = ZipOptions
{ zipOpt64 :: !Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize')
, zipOptCompressLevel :: !Int -- ^Compress zipped files (0 = store only, 1 = minimal, 9 = best; non-zero improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
, zipOptInfo :: !ZipInfo -- ^Other parameters to store in the zip file
}
defaultZipOptions :: ZipOptions
defaultZipOptions = ZipOptions
{ zipOpt64 = False
, zipOptCompressLevel = -1
, zipOptInfo = ZipInfo
{ zipComment = BS.empty
}
}
infixr 7 ?*
(?*) :: Num a => Bool -> a -> a
True ?* x = x
False ?* _ = 0
-- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CC.sourceFile'@).
zipFileData :: MonadResource m => FilePath -> ZipData m
zipFileData = ZipDataSource . CC.sourceFile
zipData :: Monad m => ZipData m -> Either (C.ConduitM () BS.ByteString m ()) BSL.ByteString
zipData (ZipDataByteString b) = Right b
zipData (ZipDataSource s) = Left s
dataSize :: Either a BSL.ByteString -> Maybe Word64
dataSize (Left _) = Nothing
dataSize (Right b) = Just $ fromIntegral $ BSL.length b
toDOSTime :: LocalTime -> (Word16, Word16)
toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins secs)) =
( fromIntegral hour `shiftL` 11 .|. fromIntegral mins `shiftL` 5 .|. truncate secs `shiftR` 1
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
)
countOutput :: Monad m => C.ConduitM i BS.ByteString m () -> C.ConduitM i BS.ByteString (StateT Word64 m) ()
countOutput c = stateC $ \s -> (,) () . (s +) <$> outputSize c
output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) ()
output = countOutput . sourcePut
maxBound16 :: Integral n => n
maxBound16 = fromIntegral (maxBound :: Word16)
data CommonFileHeaderInfo = CommonFileHeaderInfo
{ cfhiIsStreamingEntry :: !Bool
, cfhiHasUtf8Filename :: !Bool
, cfhiIsCompressed :: !Bool
, cfhiTime :: !Word16
, cfhiDate :: !Word16
} deriving (Eq, Ord, Show)
putCommonFileHeaderPart :: CommonFileHeaderInfo -> P.PutM ()
putCommonFileHeaderPart CommonFileHeaderInfo{..} = do
P.putWord16le $ cfhiIsStreamingEntry ?* bit 3 .|. cfhiHasUtf8Filename ?* bit 11
P.putWord16le $ cfhiIsCompressed ?* 8
P.putWord16le $ cfhiTime
P.putWord16le $ cfhiDate
-- | This is retained in memory until the end of the archive is written.
--
-- To avoid space leaks, this should contain only strict data.
data CentralDirectoryInfo = CentralDirectoryInfo
{ cdiOff :: !Word64
, cdiZ64 :: !Bool
, cdiCommonFileHeaderInfo :: !CommonFileHeaderInfo
, cdiCrc :: !Word32
, cdiUsz :: !Word64
, cdiName :: !BSC.ByteString
, cdiCsz :: !Word64
, cdiZipEntryExternalAttributes :: !(Maybe Word32) -- lazy Maybe must be e.g. via `force` at creation
} deriving (Eq, Ord, Show)
putCentralDirectory :: CentralDirectoryInfo -> P.PutM ()
putCentralDirectory CentralDirectoryInfo{..} = do
-- central directory
let o64 = cdiOff >= maxBound32
l64 = cdiZ64 ?* 16 + o64 ?* 8
a64 = cdiZ64 || o64
P.putWord32le 0x02014b50
P.putWord8 zipVersion
P.putWord8 osVersion
P.putWord8 $ if a64 then 45 else 20
P.putWord8 osVersion
putCommonFileHeaderPart cdiCommonFileHeaderInfo
P.putWord32le cdiCrc
P.putWord32le $ if cdiZ64 then maxBound32 else fromIntegral cdiCsz
P.putWord32le $ if cdiZ64 then maxBound32 else fromIntegral cdiUsz
P.putWord16le $ fromIntegral (BS.length cdiName)
P.putWord16le $ a64 ?* (4 + l64)
P.putWord16le 0 -- comment length
P.putWord16le 0 -- disk number
P.putWord16le 0 -- internal file attributes
P.putWord32le $ fromMaybe 0 cdiZipEntryExternalAttributes
P.putWord32le $ if o64 then maxBound32 else fromIntegral cdiOff
P.putByteString cdiName
when a64 $ do
P.putWord16le 0x0001
P.putWord16le l64
when cdiZ64 $ do
P.putWord64le cdiUsz
P.putWord64le cdiCsz
when o64 $
P.putWord64le cdiOff
-- |Stream produce a zip file, reading a sequence of entries with data.
-- Although file data is never kept in memory (beyond a single 'ZipDataByteString'), the format of zip files requires producing a final directory of entries at the end of the file, consuming an additional ~100 bytes of state per entry during streaming.
-- The final result is the total size of the zip file.
--
-- Depending on options, the resulting zip file should be compatible with most unzipping applications.
-- Any errors are thrown in the underlying monad (as 'ZipError's).
zipStream ::
( MonadThrow m
, PrimMonad m
) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
zipStream ZipOptions{..} = execStateC 0 $ do
(cnt, cdir) <- next 0 (return ())
cdoff <- get
output cdir
eoff <- get
endDirectory cdoff (eoff - cdoff) cnt
where
next cnt dir = C.await >>= maybe
(return (cnt, dir))
(\e -> do
d <- entry e
next (succ cnt) $ dir >> d)
entry (ZipEntry{..}, zipData -> dat) = do
let usiz = dataSize dat
sdat = left (\x -> C.toProducer x C..| sizeCRC) dat
cfhiIsCompressed = zipOptCompressLevel /= 0
&& all (0 /=) usiz
&& all (0 /=) zipEntrySize
cfhiIsStreamingEntry = isLeft dat
compressPlainBs =
Z.compressWith
Z.defaultCompressParams
{ Z.compressLevel =
if zipOptCompressLevel == -1
then Z.defaultCompression
else Z.compressionLevel zipOptCompressLevel
}
(cdat, csiz)
| cfhiIsCompressed =
( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits))
+++ compressPlainBs) sdat
, dataSize cdat)
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
cdiZ64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize)
(maxBound32 <) (max <$> usiz <*> csiz)
cfhiHasUtf8Filename = isLeft zipEntryName
cdiName = either TE.encodeUtf8 id zipEntryName
namelen = BS.length cdiName
(cfhiTime, cfhiDate) = toDOSTime zipEntryTime
mcrc = either (const Nothing) (Just . crc32) dat
!cdiCommonFileHeaderInfo = CommonFileHeaderInfo{..}
when (namelen > maxBound16) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": entry name too long"
cdiOff <- get
output $ do
P.putWord32le 0x04034b50
P.putWord8 $ if cdiZ64 then 45 else 20
P.putWord8 osVersion
putCommonFileHeaderPart cdiCommonFileHeaderInfo
P.putWord32le $ fromMaybe 0 mcrc
P.putWord32le $ if cdiZ64 then maxBound32 else maybe 0 fromIntegral csiz
P.putWord32le $ if cdiZ64 then maxBound32 else maybe 0 fromIntegral usiz
P.putWord16le $ fromIntegral namelen
P.putWord16le $ cdiZ64 ?* 20
P.putByteString cdiName
when cdiZ64 $ do
P.putWord16le 0x0001
P.putWord16le 16
P.putWord64le $ fromMaybe 0 usiz
P.putWord64le $ fromMaybe 0 csiz
let outsz c = stateC $ \(!o) -> (id &&& (o +) . snd) <$> c
((cdiUsz, cdiCrc), cdiCsz) <- either
(\cd -> do
r@((usz, crc), csz) <- outsz cd -- write compressed data
when (not cdiZ64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
output $ do
P.putWord32le 0x08074b50
P.putWord32le crc
let putsz
| cdiZ64 = P.putWord64le
| otherwise = P.putWord32le . fromIntegral
putsz csz
putsz usz
return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
cdat
when (any (cdiUsz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize"
let !centralDirectoryInfo = CentralDirectoryInfo
{ cdiZipEntryExternalAttributes = force zipEntryExternalAttributes
, .. }
return $ putCentralDirectory centralDirectoryInfo
endDirectory cdoff cdlen cnt = do
let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16
when z64 $ output $ do
P.putWord32le 0x06064b50 -- zip64 end
P.putWord64le 44 -- length of this record
P.putWord8 zipVersion
P.putWord8 osVersion
P.putWord8 45
P.putWord8 osVersion
P.putWord32le 0 -- disk
P.putWord32le 0 -- central disk
P.putWord64le cnt
P.putWord64le cnt
P.putWord64le cdlen
P.putWord64le cdoff
P.putWord32le 0x07064b50 -- locator:
P.putWord32le 0 -- central disk
P.putWord64le $ cdoff + cdlen
P.putWord32le 1 -- total disks
let comment = zipComment zipOptInfo
commlen = BS.length comment
when (commlen > maxBound16) $ zipError "comment too long"
output $ do
P.putWord32le 0x06054b50 -- end
P.putWord16le 0 -- disk
P.putWord16le 0 -- central disk
P.putWord16le $ fromIntegral $ min maxBound16 cnt
P.putWord16le $ fromIntegral $ min maxBound16 cnt
P.putWord32le $ fromIntegral $ min maxBound32 cdlen
P.putWord32le $ fromIntegral $ min maxBound32 cdoff
P.putWord16le $ fromIntegral commlen
P.putByteString comment
|