1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
# $Id: extract.py 11306 2010-02-27 20:33:59Z mthuurne $
# Extract files from archives.
from os import O_CREAT, O_WRONLY, fdopen, mkdir, open as osopen, utime
try:
from os import O_BINARY
except ImportError:
# Platforms that do not define O_BINARY do not need it either.
O_BINARY = 0
from os.path import abspath, isdir, join as joinpath, sep, split as splitpath
from stat import S_IRWXU, S_IRWXG, S_IRWXO, S_IXUSR, S_IXGRP, S_IXOTH
import sys
import tarfile
from detectsys import detectOS
hostOS = detectOS()
# Note: Larger buffers might make extraction slower.
bufSize = 16384
def extract(archivePath, destDir, rename = None):
'''Extract the given archive to the given directory.
If a rename function is given, it is called with the output path relative
to the destination directory; the value returned by the rename function is
used as the actual relative destination file path.
This function sets file ownership and permissions like is done in newly
created files and ignores the ownership and permissions from the archive,
since we are not restoring a backup.
'''
absDestDir = abspath(destDir) + sep
if not isdir(absDestDir):
raise ValueError(
'Destination directory "%s" does not exist' % absDestDir
)
tar = tarfile.open(archivePath)
# Note: According to the Python 2.6 docs, errorlevel can be passed as a
# keyword argument to the open() call, but on Python 2.5 this does
# not work.
tar.errorlevel = 2
try:
for member in tar.getmembers():
absMemberPath = abspath(joinpath(absDestDir, member.name))
if member.isdir():
absMemberPath += sep
if not absMemberPath.startswith(absDestDir):
raise ValueError(
'Refusing to extract tar entry "%s" '
'outside destination directory'
% member.name
)
if rename:
absMemberPath = absDestDir + rename(
absMemberPath[len(absDestDir) : ]
)
if member.isfile():
mode = S_IRWXU | S_IRWXG | S_IRWXO
if not (member.mode & S_IXUSR):
mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH)
out = fdopen(
osopen(absMemberPath, O_CREAT | O_WRONLY | O_BINARY, mode),
'wb'
)
try:
inp = tar.extractfile(member)
bytesLeft = member.size
while bytesLeft > 0:
buf = inp.read(bufSize)
out.write(buf)
bytesLeft -= len(buf)
buf = None
finally:
out.close()
elif member.isdir():
if not isdir(absMemberPath):
mkdir(absMemberPath)
else:
raise ValueError(
'Cannot extract tar entry "%s": '
'not a regular file or a directory'
% member.name
)
# Set file/directory modification time to match the archive.
# For example autotools track dependencies between archived files
# and will attempt to regenerate them if the time stamps indicate
# one is older than the other.
# Note: Apparently Python 2.5's utime() cannot set timestamps on
# directories in Windows.
if member.isfile() or hostOS != 'mingw32':
utime(absMemberPath, (member.mtime, member.mtime))
finally:
tar.close()
class TopLevelDirRenamer(object):
def __init__(self, newName):
self.newName = newName
def __call__(self, oldPath):
head, tail = splitpath(oldPath)
headParts = head.split(sep)
if not headParts:
raise ValueError(
'Directory part is empty for entry "%s"' % oldPath
)
headParts[0] = self.newName
return sep.join(headParts + [ tail ])
if __name__ == '__main__':
if 3 <= len(sys.argv) <= 4:
if len(sys.argv) == 4:
renameTopLevelDir = TopLevelDirRenamer(sys.argv[3])
else:
renameTopLevelDir = None
extract(sys.argv[1], sys.argv[2], renameTopLevelDir)
else:
print >> sys.stderr, \
'Usage: python extract.py archive destination [new-top-level-dir]'
sys.exit(2)
|