File: extract.py

package info (click to toggle)
openmsx 21.0%2Bdfsg-2
  • links: PTS
  • area: main
  • in suites: forky
  • size: 28,132 kB
  • sloc: cpp: 244,928; xml: 54,344; tcl: 15,603; python: 5,335; perl: 281; sh: 78; makefile: 57
file content (112 lines) | stat: -rw-r--r-- 3,620 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Extract files from archives.

from pathlib import Path
from os import O_CREAT, O_WRONLY, fdopen, open as osopen, utime
try:
	from os import O_BINARY
except ImportError:
	# Platforms that do not define O_BINARY do not need it either.
	O_BINARY = 0
try:
	from os import symlink
except ImportError:
	def symlink(source, link_name):
		raise OSError('OS does not support symlink creation')
from os.path import abspath, isdir, join as joinpath, sep, split as splitpath
from stat import S_IRWXU, S_IRWXG, S_IRWXO, S_IXUSR, S_IXGRP, S_IXOTH
from tarfile import TarFile
import sys

from detectsys import detectOS

hostOS = detectOS()

# Note: Larger buffers might make extraction slower.
bufSize = 16384

def extract(archivePath, destDir, rename = None):
	'''Extract the given archive to the given directory.
	If a rename function is given, it is called with the output path relative
	to the destination directory; the value returned by the rename function is
	used as the actual relative destination file path.
	This function sets file ownership and permissions like is done in newly
	created files and ignores the ownership and permissions from the archive,
	since we are not restoring a backup.
	'''
	absDestDir = Path(destDir).resolve()
	if not absDestDir.is_dir():
		raise ValueError(
			f'Destination directory "{absDestDir}" does not exist'
		)

	createdDirs = set()
	with TarFile.open(archivePath, errorlevel=2) as tar:
		for member in tar.getmembers():
			absMemberPath = absDestDir / member.name
			if not absMemberPath.is_relative_to(absDestDir):
				raise ValueError(
					f'Refusing to extract tar entry "{member.name}" '
					'outside destination directory'
				)
			if rename:
				absMemberPath = absDestDir / rename(
					absMemberPath.relative_to(absDestDir)
				)

			if member.isfile():
				parent = absMemberPath.parent
				if parent not in createdDirs:
					parent.mkdir(parents=True, exist_ok=True)
					createdDirs.add(parent)
				mode = S_IRWXU | S_IRWXG | S_IRWXO
				if not (member.mode & S_IXUSR):
					mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH)
				fd = osopen(absMemberPath, O_CREAT | O_WRONLY | O_BINARY, mode)
				with fdopen(fd, 'wb') as out:
					inp = tar.extractfile(member)
					bytesLeft = member.size
					while bytesLeft > 0:
						buf = inp.read(bufSize)
						out.write(buf)
						bytesLeft -= len(buf)
			elif member.isdir():
				absMemberPath.mkdir(parents=True, exist_ok=True)
				createdDirs.add(absMemberPath)
			elif member.issym():
				try:
					symlink(member.linkname, absMemberPath)
				except OSError as ex:
					print(
						'WARNING: Skipping symlink creation: '
						f'{absMemberPath} -> {member.linkname}: {ex}'
					)
			else:
				raise ValueError(
					f'Cannot extract tar entry "{member.name}": '
					'not a regular file, symlink or directory'
				)
			# Set file/directory modification time to match the archive.
			# For example autotools track dependencies between archived files
			# and will attempt to regenerate them if the time stamps indicate
			# one is older than the other.
			if member.isfile():
				utime(absMemberPath, (member.mtime, member.mtime))

class TopLevelDirRenamer:

	def __init__(self, newName):
		self.newName = newName

	def __call__(self, oldPath):
		return Path(self.newName, *oldPath.parts[1:])

if __name__ == '__main__':
	if 3 <= len(sys.argv) <= 4:
		if len(sys.argv) == 4:
			renameTopLevelDir = TopLevelDirRenamer(sys.argv[3])
		else:
			renameTopLevelDir = None
		extract(sys.argv[1], sys.argv[2], renameTopLevelDir)
	else:
		print('Usage: python3 extract.py archive destination [new-top-level-dir]', file=sys.stderr)
		sys.exit(2)