1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
|
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
-----------------------------------------------------------------------------
-- |
-- Module : Distribution.Client.Tar
-- Copyright : (c) 2007 Bjorn Bringert,
-- 2008 Andrea Vezzosi,
-- 2008-2009 Duncan Coutts
-- License : BSD3
--
-- Maintainer : duncan@community.haskell.org
-- Portability : portable
--
-- Reading, writing and manipulating \"@.tar@\" archive files.
--
-----------------------------------------------------------------------------
module Distribution.Client.Tar (
-- * High level \"all in one\" operations
createTarGzFile,
extractTarGzFile,
-- * Converting between internal and external representation
read,
write,
writeEntries,
-- * Packing and unpacking files to\/from internal representation
pack,
unpack,
-- * Tar entry and associated types
Entry(..),
entryPath,
EntryContent(..),
Ownership(..),
FileSize,
Permissions,
EpochTime,
DevMajor,
DevMinor,
TypeCode,
Format(..),
buildTreeRefTypeCode,
buildTreeSnapshotTypeCode,
isBuildTreeRefTypeCode,
entrySizeInBlocks,
entrySizeInBytes,
-- * Constructing simple entry values
simpleEntry,
fileEntry,
directoryEntry,
-- * TarPath type
TarPath,
toTarPath,
fromTarPath,
-- ** Sequences of tar entries
Entries(..),
foldrEntries,
foldlEntries,
unfoldrEntries,
mapEntries,
filterEntries,
entriesIndex,
) where
import Data.Char (ord)
import Data.Int (Int64)
import Data.Bits (Bits, shiftL, testBit)
import Data.List (foldl')
import Numeric (readOct, showOct)
import Control.Applicative (Applicative(..))
import Control.Monad (MonadPlus(mplus), when, ap, liftM)
import qualified Data.Map as Map
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BS.Char8
import Data.ByteString.Lazy (ByteString)
import qualified Codec.Compression.GZip as GZip
import qualified Distribution.Client.GZipUtils as GZipUtils
import System.FilePath
( (</>) )
import qualified System.FilePath as FilePath.Native
import qualified System.FilePath.Windows as FilePath.Windows
import qualified System.FilePath.Posix as FilePath.Posix
import System.Directory
( getDirectoryContents, doesDirectoryExist
, getPermissions, createDirectoryIfMissing, copyFile )
import qualified System.Directory as Permissions
( Permissions(executable) )
import Distribution.Client.Compat.FilePerms
( setFileExecutable )
import System.Posix.Types
( FileMode )
import Distribution.Client.Compat.Time
( EpochTime, getModTime )
import System.IO
( IOMode(ReadMode), openBinaryFile, hFileSize )
import System.IO.Unsafe (unsafeInterleaveIO)
import Prelude hiding (read)
--
-- * High level operations
--
createTarGzFile :: FilePath -- ^ Full Tarball path
-> FilePath -- ^ Base directory
-> FilePath -- ^ Directory to archive, relative to base dir
-> IO ()
createTarGzFile tar base dir =
BS.writeFile tar . GZip.compress . write =<< pack base [dir]
extractTarGzFile :: FilePath -- ^ Destination directory
-> FilePath -- ^ Expected subdir (to check for tarbombs)
-> FilePath -- ^ Tarball
-> IO ()
extractTarGzFile dir expected tar =
unpack dir . checkTarbomb expected . read
. GZipUtils.maybeDecompress =<< BS.readFile tar
--
-- * Entry type
--
type FileSize = Int64
type DevMajor = Int
type DevMinor = Int
type TypeCode = Char
type Permissions = FileMode
-- | Tar archive entry.
--
data Entry = Entry {
-- | The path of the file or directory within the archive. This is in a
-- tar-specific form. Use 'entryPath' to get a native 'FilePath'.
entryTarPath :: !TarPath,
-- | The real content of the entry. For 'NormalFile' this includes the
-- file data. An entry usually contains a 'NormalFile' or a 'Directory'.
entryContent :: !EntryContent,
-- | File permissions (Unix style file mode).
entryPermissions :: !Permissions,
-- | The user and group to which this file belongs.
entryOwnership :: !Ownership,
-- | The time the file was last modified.
entryTime :: !EpochTime,
-- | The tar format the archive is using.
entryFormat :: !Format
}
-- | Type code for the local build tree reference entry type. We don't use the
-- symbolic link entry type because it allows only 100 ASCII characters for the
-- path.
buildTreeRefTypeCode :: TypeCode
buildTreeRefTypeCode = 'C'
-- | Type code for the local build tree snapshot entry type.
buildTreeSnapshotTypeCode :: TypeCode
buildTreeSnapshotTypeCode = 'S'
-- | Is this a type code for a build tree reference?
isBuildTreeRefTypeCode :: TypeCode -> Bool
isBuildTreeRefTypeCode typeCode
| (typeCode == buildTreeRefTypeCode
|| typeCode == buildTreeSnapshotTypeCode) = True
| otherwise = False
-- | Native 'FilePath' of the file or directory within the archive.
--
entryPath :: Entry -> FilePath
entryPath = fromTarPath . entryTarPath
-- | Return the size of an entry in bytes.
entrySizeInBytes :: Entry -> FileSize
entrySizeInBytes = (*512) . fromIntegral . entrySizeInBlocks
-- | Return the number of blocks in an entry.
entrySizeInBlocks :: Entry -> Int
entrySizeInBlocks entry = 1 + case entryContent entry of
NormalFile _ size -> bytesToBlocks size
OtherEntryType _ _ size -> bytesToBlocks size
_ -> 0
where
bytesToBlocks s = 1 + ((fromIntegral s - 1) `div` 512)
-- | The content of a tar archive entry, which depends on the type of entry.
--
-- Portable archives should contain only 'NormalFile' and 'Directory'.
--
data EntryContent = NormalFile ByteString !FileSize
| Directory
| SymbolicLink !LinkTarget
| HardLink !LinkTarget
| CharacterDevice !DevMajor !DevMinor
| BlockDevice !DevMajor !DevMinor
| NamedPipe
| OtherEntryType !TypeCode ByteString !FileSize
data Ownership = Ownership {
-- | The owner user name. Should be set to @\"\"@ if unknown.
ownerName :: String,
-- | The owner group name. Should be set to @\"\"@ if unknown.
groupName :: String,
-- | Numeric owner user id. Should be set to @0@ if unknown.
ownerId :: !Int,
-- | Numeric owner group id. Should be set to @0@ if unknown.
groupId :: !Int
}
-- | There have been a number of extensions to the tar file format over the
-- years. They all share the basic entry fields and put more meta-data in
-- different extended headers.
--
data Format =
-- | This is the classic Unix V7 tar format. It does not support owner and
-- group names, just numeric Ids. It also does not support device numbers.
V7Format
-- | The \"USTAR\" format is an extension of the classic V7 format. It was
-- later standardised by POSIX. It has some restrictions but is the most
-- portable format.
--
| UstarFormat
-- | The GNU tar implementation also extends the classic V7 format, though
-- in a slightly different way from the USTAR format. In general for new
-- archives the standard USTAR/POSIX should be used.
--
| GnuFormat
deriving Eq
-- | @rw-r--r--@ for normal files
ordinaryFilePermissions :: Permissions
ordinaryFilePermissions = 0o0644
-- | @rwxr-xr-x@ for executable files
executableFilePermissions :: Permissions
executableFilePermissions = 0o0755
-- | @rwxr-xr-x@ for directories
directoryPermissions :: Permissions
directoryPermissions = 0o0755
isExecutable :: Permissions -> Bool
isExecutable p = testBit p 0 || testBit p 6 -- user or other executable
-- | An 'Entry' with all default values except for the file name and type. It
-- uses the portable USTAR/POSIX format (see 'UstarHeader').
--
-- You can use this as a basis and override specific fields, eg:
--
-- > (emptyEntry name HardLink) { linkTarget = target }
--
simpleEntry :: TarPath -> EntryContent -> Entry
simpleEntry tarpath content = Entry {
entryTarPath = tarpath,
entryContent = content,
entryPermissions = case content of
Directory -> directoryPermissions
_ -> ordinaryFilePermissions,
entryOwnership = Ownership "" "" 0 0,
entryTime = 0,
entryFormat = UstarFormat
}
-- | A tar 'Entry' for a file.
--
-- Entry fields such as file permissions and ownership have default values.
--
-- You can use this as a basis and override specific fields. For example if you
-- need an executable file you could use:
--
-- > (fileEntry name content) { fileMode = executableFileMode }
--
fileEntry :: TarPath -> ByteString -> Entry
fileEntry name fileContent =
simpleEntry name (NormalFile fileContent (BS.length fileContent))
-- | A tar 'Entry' for a directory.
--
-- Entry fields such as file permissions and ownership have default values.
--
directoryEntry :: TarPath -> Entry
directoryEntry name = simpleEntry name Directory
--
-- * Tar paths
--
-- | The classic tar format allowed just 100 characters for the file name. The
-- USTAR format extended this with an extra 155 characters, however it uses a
-- complex method of splitting the name between the two sections.
--
-- Instead of just putting any overflow into the extended area, it uses the
-- extended area as a prefix. The aggravating insane bit however is that the
-- prefix (if any) must only contain a directory prefix. That is the split
-- between the two areas must be on a directory separator boundary. So there is
-- no simple calculation to work out if a file name is too long. Instead we
-- have to try to find a valid split that makes the name fit in the two areas.
--
-- The rationale presumably was to make it a bit more compatible with old tar
-- programs that only understand the classic format. A classic tar would be
-- able to extract the file name and possibly some dir prefix, but not the
-- full dir prefix. So the files would end up in the wrong place, but that's
-- probably better than ending up with the wrong names too.
--
-- So it's understandable but rather annoying.
--
-- * Tar paths use POSIX format (ie @\'/\'@ directory separators), irrespective
-- of the local path conventions.
--
-- * The directory separator between the prefix and name is /not/ stored.
--
data TarPath = TarPath FilePath -- path name, 100 characters max.
FilePath -- path prefix, 155 characters max.
deriving (Eq, Ord)
-- | Convert a 'TarPath' to a native 'FilePath'.
--
-- The native 'FilePath' will use the native directory separator but it is not
-- otherwise checked for validity or sanity. In particular:
--
-- * The tar path may be invalid as a native path, eg the filename @\"nul\"@ is
-- not valid on Windows.
--
-- * The tar path may be an absolute path or may contain @\"..\"@ components.
-- For security reasons this should not usually be allowed, but it is your
-- responsibility to check for these conditions (eg using 'checkSecurity').
--
fromTarPath :: TarPath -> FilePath
fromTarPath (TarPath name prefix) = adjustDirectory $
FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix
++ FilePath.Posix.splitDirectories name
where
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
-- | Convert a native 'FilePath' to a 'TarPath'.
--
-- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a
-- description of the problem with splitting long 'FilePath's.
--
toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
-- directories a 'TarPath' must always use a trailing @\/@.
-> FilePath -> Either String TarPath
toTarPath isDir = splitLongPath
. addTrailingSep
. FilePath.Posix.joinPath
. FilePath.Native.splitDirectories
where
addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator
| otherwise = id
-- | Take a sanitized path, split on directory separators and try to pack it
-- into the 155 + 100 tar file name format.
--
-- The strategy is this: take the name-directory components in reverse order
-- and try to fit as many components into the 100 long name area as possible.
-- If all the remaining components fit in the 155 name area then we win.
--
splitLongPath :: FilePath -> Either String TarPath
splitLongPath path =
case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
Left err -> Left err
Right (name, []) -> Right (TarPath name "")
Right (name, first:rest) -> case packName prefixMax remainder of
Left err -> Left err
Right (_ , _ : _) -> Left "File name too long (cannot split)"
Right (prefix, []) -> Right (TarPath name prefix)
where
-- drop the '/' between the name and prefix:
remainder = init first : rest
where
nameMax, prefixMax :: Int
nameMax = 100
prefixMax = 155
packName _ [] = Left "File name empty"
packName maxLen (c:cs)
| n > maxLen = Left "File name too long"
| otherwise = Right (packName' maxLen n [c] cs)
where n = length c
packName' maxLen n ok (c:cs)
| n' <= maxLen = packName' maxLen n' (c:ok) cs
where n' = n + length c
packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs)
-- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and
-- 'HardLink' entry types.
--
newtype LinkTarget = LinkTarget FilePath
deriving (Eq, Ord)
-- | Convert a tar 'LinkTarget' to a native 'FilePath'.
--
fromLinkTarget :: LinkTarget -> FilePath
fromLinkTarget (LinkTarget path) = adjustDirectory $
FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path
where
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
--
-- * Entries type
--
-- | A tar archive is a sequence of entries.
data Entries = Next Entry Entries
| Done
| Fail String
unfoldrEntries :: (a -> Either String (Maybe (Entry, a))) -> a -> Entries
unfoldrEntries f = unfold
where
unfold x = case f x of
Left err -> Fail err
Right Nothing -> Done
Right (Just (e, x')) -> Next e (unfold x')
foldrEntries :: (Entry -> a -> a) -> a -> (String -> a) -> Entries -> a
foldrEntries next done fail' = fold
where
fold (Next e es) = next e (fold es)
fold Done = done
fold (Fail err) = fail' err
foldlEntries :: (a -> Entry -> a) -> a -> Entries -> Either String a
foldlEntries f = fold
where
fold a (Next e es) = (fold $! f a e) es
fold a Done = Right a
fold _ (Fail err) = Left err
mapEntries :: (Entry -> Entry) -> Entries -> Entries
mapEntries f = foldrEntries (Next . f) Done Fail
filterEntries :: (Entry -> Bool) -> Entries -> Entries
filterEntries p =
foldrEntries
(\entry rest -> if p entry
then Next entry rest
else rest)
Done Fail
checkEntries :: (Entry -> Maybe String) -> Entries -> Entries
checkEntries checkEntry =
foldrEntries
(\entry rest -> case checkEntry entry of
Nothing -> Next entry rest
Just err -> Fail err)
Done Fail
entriesIndex :: Entries -> Either String (Map.Map TarPath Entry)
entriesIndex = foldlEntries (\m e -> Map.insert (entryTarPath e) e m) Map.empty
--
-- * Checking
--
-- | This function checks a sequence of tar entries for file name security
-- problems. It checks that:
--
-- * file paths are not absolute
--
-- * file paths do not contain any path components that are \"@..@\"
--
-- * file names are valid
--
-- These checks are from the perspective of the current OS. That means we check
-- for \"@C:\blah@\" files on Windows and \"\/blah\" files on Unix. For archive
-- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the
-- link target. A failure in any entry terminates the sequence of entries with
-- an error.
--
checkSecurity :: Entries -> Entries
checkSecurity = checkEntries checkEntrySecurity
checkTarbomb :: FilePath -> Entries -> Entries
checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir)
checkEntrySecurity :: Entry -> Maybe String
checkEntrySecurity entry = case entryContent entry of
HardLink link -> check (entryPath entry)
`mplus` check (fromLinkTarget link)
SymbolicLink link -> check (entryPath entry)
`mplus` check (fromLinkTarget link)
_ -> check (entryPath entry)
where
check name
| not (FilePath.Native.isRelative name)
= Just $ "Absolute file name in tar archive: " ++ show name
| not (FilePath.Native.isValid name)
= Just $ "Invalid file name in tar archive: " ++ show name
| ".." `elem` FilePath.Native.splitDirectories name
= Just $ "Invalid file name in tar archive: " ++ show name
| otherwise = Nothing
checkEntryTarbomb :: FilePath -> Entry -> Maybe String
checkEntryTarbomb _ entry | nonFilesystemEntry = Nothing
where
-- Ignore some special entries we will not unpack anyway
nonFilesystemEntry =
case entryContent entry of
OtherEntryType 'g' _ _ -> True --PAX global header
OtherEntryType 'x' _ _ -> True --PAX individual header
_ -> False
checkEntryTarbomb expectedTopDir entry =
case FilePath.Native.splitDirectories (entryPath entry) of
(topDir:_) | topDir == expectedTopDir -> Nothing
s -> Just $ "File in tar archive is not in the expected directory. "
++ "Expected: " ++ show expectedTopDir
++ " but got the following hierarchy: "
++ show s
--
-- * Reading
--
read :: ByteString -> Entries
read = unfoldrEntries getEntry
getEntry :: ByteString -> Either String (Maybe (Entry, ByteString))
getEntry bs
| BS.length header < 512 = Left "truncated tar archive"
-- Tar files end with at least two blocks of all '0'. Checking this serves
-- two purposes. It checks the format but also forces the tail of the data
-- which is necessary to close the file if it came from a lazily read file.
| BS.head bs == 0 = case BS.splitAt 1024 bs of
(end, trailing)
| BS.length end /= 1024 -> Left "short tar trailer"
| not (BS.all (== 0) end) -> Left "bad tar trailer"
| not (BS.all (== 0) trailing) -> Left "tar file has trailing junk"
| otherwise -> Right Nothing
| otherwise = partial $ do
case (chksum_, format_) of
(Ok chksum, _ ) | correctChecksum header chksum -> return ()
(Ok _, Ok _) -> fail "tar checksum error"
_ -> fail "data is not in tar format"
-- These fields are partial, have to check them
format <- format_; mode <- mode_;
uid <- uid_; gid <- gid_;
size <- size_; mtime <- mtime_;
devmajor <- devmajor_; devminor <- devminor_;
let content = BS.take size (BS.drop 512 bs)
padding = (512 - size) `mod` 512
bs' = BS.drop (512 + size + padding) bs
entry = Entry {
entryTarPath = TarPath name prefix,
entryContent = case typecode of
'\0' -> NormalFile content size
'0' -> NormalFile content size
'1' -> HardLink (LinkTarget linkname)
'2' -> SymbolicLink (LinkTarget linkname)
'3' -> CharacterDevice devmajor devminor
'4' -> BlockDevice devmajor devminor
'5' -> Directory
'6' -> NamedPipe
'7' -> NormalFile content size
_ -> OtherEntryType typecode content size,
entryPermissions = mode,
entryOwnership = Ownership uname gname uid gid,
entryTime = mtime,
entryFormat = format
}
return (Just (entry, bs'))
where
header = BS.take 512 bs
name = getString 0 100 header
mode_ = getOct 100 8 header
uid_ = getOct 108 8 header
gid_ = getOct 116 8 header
size_ = getOct 124 12 header
mtime_ = getOct 136 12 header
chksum_ = getOct 148 8 header
typecode = getByte 156 header
linkname = getString 157 100 header
magic = getChars 257 8 header
uname = getString 265 32 header
gname = getString 297 32 header
devmajor_ = getOct 329 8 header
devminor_ = getOct 337 8 header
prefix = getString 345 155 header
-- trailing = getBytes 500 12 header
format_ = case magic of
"\0\0\0\0\0\0\0\0" -> return V7Format
"ustar\NUL00" -> return UstarFormat
"ustar \NUL" -> return GnuFormat
_ -> fail "tar entry not in a recognised format"
correctChecksum :: ByteString -> Int -> Bool
correctChecksum header checksum = checksum == checksum'
where
-- sum of all 512 bytes in the header block,
-- treating each byte as an 8-bit unsigned value
checksum' = BS.Char8.foldl' (\x y -> x + ord y) 0 header'
-- treating the 8 bytes of chksum as blank characters.
header' = BS.concat [BS.take 148 header,
BS.Char8.replicate 8 ' ',
BS.drop 156 header]
-- * TAR format primitive input
getOct :: (Integral a, Bits a) => Int64 -> Int64 -> ByteString -> Partial a
getOct off len header
| BS.head bytes == 128 = parseBinInt (BS.unpack (BS.tail bytes))
| null octstr = return 0
| otherwise = case readOct octstr of
[(x,[])] -> return x
_ -> fail "tar header is malformed (bad numeric encoding)"
where
bytes = getBytes off len header
octstr = BS.Char8.unpack
. BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ')
. BS.Char8.dropWhile (== ' ')
$ bytes
-- Some tar programs switch into a binary format when they try to represent
-- field values that will not fit in the required width when using the text
-- octal format. In particular, the UID/GID fields can only hold up to 2^21
-- while in the binary format can hold up to 2^32. The binary format uses
-- '\128' as the header which leaves 7 bytes. Only the last 4 are used.
parseBinInt [0, 0, 0, byte3, byte2, byte1, byte0] =
return $! shiftL (fromIntegral byte3) 24
+ shiftL (fromIntegral byte2) 16
+ shiftL (fromIntegral byte1) 8
+ shiftL (fromIntegral byte0) 0
parseBinInt _ = fail "tar header uses non-standard number encoding"
getBytes :: Int64 -> Int64 -> ByteString -> ByteString
getBytes off len = BS.take len . BS.drop off
getByte :: Int64 -> ByteString -> Char
getByte off bs = BS.Char8.index bs off
getChars :: Int64 -> Int64 -> ByteString -> String
getChars off len = BS.Char8.unpack . getBytes off len
getString :: Int64 -> Int64 -> ByteString -> String
getString off len = BS.Char8.unpack . BS.Char8.takeWhile (/='\0')
. getBytes off len
data Partial a = Error String | Ok a
partial :: Partial a -> Either String a
partial (Error msg) = Left msg
partial (Ok x) = Right x
instance Functor Partial where
fmap = liftM
instance Applicative Partial where
pure = return
(<*>) = ap
instance Monad Partial where
return = Ok
Error m >>= _ = Error m
Ok x >>= k = k x
fail = Error
--
-- * Writing
--
-- | Create the external representation of a tar archive by serialising a list
-- of tar entries.
--
-- * The conversion is done lazily.
--
write :: [Entry] -> ByteString
write es = BS.concat $ map putEntry es ++ [BS.replicate (512*2) 0]
-- | Same as 'write', but for 'Entries'.
writeEntries :: Entries -> ByteString
writeEntries entries = BS.concat $ foldrEntries (\e res -> putEntry e : res)
[BS.replicate (512*2) 0] error entries
putEntry :: Entry -> ByteString
putEntry entry = case entryContent entry of
NormalFile content size -> BS.concat [ header, content, padding size ]
OtherEntryType _ content size -> BS.concat [ header, content, padding size ]
_ -> header
where
header = putHeader entry
padding size = BS.replicate paddingSize 0
where paddingSize = fromIntegral (negate size `mod` 512)
putHeader :: Entry -> ByteString
putHeader entry =
BS.concat [ BS.take 148 block
, BS.Char8.pack $ putOct 7 checksum
, BS.Char8.singleton ' '
, BS.drop 156 block ]
where
-- putHeaderNoChkSum returns a String, so we convert it to the final
-- representation before calculating the checksum.
block = BS.Char8.pack . putHeaderNoChkSum $ entry
checksum = BS.Char8.foldl' (\x y -> x + ord y) 0 block
putHeaderNoChkSum :: Entry -> String
putHeaderNoChkSum Entry {
entryTarPath = TarPath name prefix,
entryContent = content,
entryPermissions = permissions,
entryOwnership = ownership,
entryTime = modTime,
entryFormat = format
} =
concat
[ putString 100 $ name
, putOct 8 $ permissions
, putOct 8 $ ownerId ownership
, putOct 8 $ groupId ownership
, putOct 12 $ contentSize
, putOct 12 $ modTime
, fill 8 $ ' ' -- dummy checksum
, putChar8 $ typeCode
, putString 100 $ linkTarget
] ++
case format of
V7Format ->
fill 255 '\NUL'
UstarFormat -> concat
[ putString 8 $ "ustar\NUL00"
, putString 32 $ ownerName ownership
, putString 32 $ groupName ownership
, putOct 8 $ deviceMajor
, putOct 8 $ deviceMinor
, putString 155 $ prefix
, fill 12 $ '\NUL'
]
GnuFormat -> concat
[ putString 8 $ "ustar \NUL"
, putString 32 $ ownerName ownership
, putString 32 $ groupName ownership
, putGnuDev 8 $ deviceMajor
, putGnuDev 8 $ deviceMinor
, putString 155 $ prefix
, fill 12 $ '\NUL'
]
where
(typeCode, contentSize, linkTarget,
deviceMajor, deviceMinor) = case content of
NormalFile _ size -> ('0' , size, [], 0, 0)
Directory -> ('5' , 0, [], 0, 0)
SymbolicLink (LinkTarget link) -> ('2' , 0, link, 0, 0)
HardLink (LinkTarget link) -> ('1' , 0, link, 0, 0)
CharacterDevice major minor -> ('3' , 0, [], major, minor)
BlockDevice major minor -> ('4' , 0, [], major, minor)
NamedPipe -> ('6' , 0, [], 0, 0)
OtherEntryType code _ size -> (code, size, [], 0, 0)
putGnuDev w n = case content of
CharacterDevice _ _ -> putOct w n
BlockDevice _ _ -> putOct w n
_ -> replicate w '\NUL'
-- * TAR format primitive output
type FieldWidth = Int
putString :: FieldWidth -> String -> String
putString n s = take n s ++ fill (n - length s) '\NUL'
--TODO: check integer widths, eg for large file sizes
putOct :: (Show a, Integral a) => FieldWidth -> a -> String
putOct n x =
let octStr = take (n-1) $ showOct x ""
in fill (n - length octStr - 1) '0'
++ octStr
++ putChar8 '\NUL'
putChar8 :: Char -> String
putChar8 c = [c]
fill :: FieldWidth -> Char -> String
fill n c = replicate n c
--
-- * Unpacking
--
unpack :: FilePath -> Entries -> IO ()
unpack baseDir entries = unpackEntries [] (checkSecurity entries)
>>= emulateLinks
where
-- We're relying here on 'checkSecurity' to make sure we're not scribbling
-- files all over the place.
unpackEntries _ (Fail err) = fail err
unpackEntries links Done = return links
unpackEntries links (Next entry es) = case entryContent entry of
NormalFile file _ -> extractFile entry path file
>> unpackEntries links es
Directory -> extractDir path
>> unpackEntries links es
HardLink link -> (unpackEntries $! saveLink path link links) es
SymbolicLink link -> (unpackEntries $! saveLink path link links) es
_ -> unpackEntries links es --ignore other file types
where
path = entryPath entry
extractFile entry path content = do
-- Note that tar archives do not make sure each directory is created
-- before files they contain, indeed we may have to create several
-- levels of directory.
createDirectoryIfMissing True absDir
BS.writeFile absPath content
when (isExecutable (entryPermissions entry))
(setFileExecutable absPath)
where
absDir = baseDir </> FilePath.Native.takeDirectory path
absPath = baseDir </> path
extractDir path = createDirectoryIfMissing True (baseDir </> path)
saveLink path link links = seq (length path)
$ seq (length link')
$ (path, link'):links
where link' = fromLinkTarget link
emulateLinks = mapM_ $ \(relPath, relLinkTarget) ->
let absPath = baseDir </> relPath
absTarget = FilePath.Native.takeDirectory absPath </> relLinkTarget
in copyFile absTarget absPath
--
-- * Packing
--
pack :: FilePath -- ^ Base directory
-> [FilePath] -- ^ Files and directories to pack, relative to the base dir
-> IO [Entry]
pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir
preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
preparePaths baseDir paths =
fmap concat $ interleave
[ do isDir <- doesDirectoryExist (baseDir </> path)
if isDir
then do entries <- getDirectoryContentsRecursive (baseDir </> path)
return (FilePath.Native.addTrailingPathSeparator path
: map (path </>) entries)
else return [path]
| path <- paths ]
packPaths :: FilePath -> [FilePath] -> IO [Entry]
packPaths baseDir paths =
interleave
[ do tarpath <- either fail return (toTarPath isDir relpath)
if isDir then packDirectoryEntry filepath tarpath
else packFileEntry filepath tarpath
| relpath <- paths
, let isDir = FilePath.Native.hasTrailingPathSeparator filepath
filepath = baseDir </> relpath ]
interleave :: [IO a] -> IO [a]
interleave = unsafeInterleaveIO . go
where
go [] = return []
go (x:xs) = do
x' <- x
xs' <- interleave xs
return (x':xs')
packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
packFileEntry filepath tarpath = do
mtime <- getModTime filepath
perms <- getPermissions filepath
file <- openBinaryFile filepath ReadMode
size <- hFileSize file
content <- BS.hGetContents file
return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
entryPermissions = if Permissions.executable perms
then executableFilePermissions
else ordinaryFilePermissions,
entryTime = mtime
}
packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
packDirectoryEntry filepath tarpath = do
mtime <- getModTime filepath
return (directoryEntry tarpath) {
entryTime = mtime
}
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive dir0 =
fmap tail (recurseDirectories dir0 [""])
recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
recurseDirectories _ [] = return []
recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do
(files, dirs') <- collect [] [] =<< getDirectoryContents (base </> dir)
files' <- recurseDirectories base (dirs' ++ dirs)
return (dir : files ++ files')
where
collect files dirs' [] = return (reverse files, reverse dirs')
collect files dirs' (entry:entries) | ignore entry
= collect files dirs' entries
collect files dirs' (entry:entries) = do
let dirEntry = dir </> entry
dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry
isDirectory <- doesDirectoryExist (base </> dirEntry)
if isDirectory
then collect files (dirEntry':dirs') entries
else collect (dirEntry:files) dirs' entries
ignore ['.'] = True
ignore ['.', '.'] = True
ignore _ = False
|