1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
# **NOTE** This module is used during bootstrap.
# Import *ONLY* builtin modules or modules that are collected into the base_library.zip archive.
# List of built-in modules: sys.builtin_module_names
# List of modules collected into base_library.zip: PyInstaller.compat.PY3_BASE_MODULES
import os
import struct
import marshal
import zlib
# In Python3, the MAGIC_NUMBER value is available in the importlib module. However, in the bootstrap phase we cannot use
# importlib directly, but rather its frozen variant.
import _frozen_importlib
PYTHON_MAGIC_NUMBER = _frozen_importlib._bootstrap_external.MAGIC_NUMBER
# Type codes for PYZ PYZ entries
PYZ_ITEM_MODULE = 0
PYZ_ITEM_PKG = 1
PYZ_ITEM_DATA = 2 # deprecated; PYZ does not contain any data entries anymore
PYZ_ITEM_NSPKG = 3 # PEP-420 namespace package
class ArchiveReadError(RuntimeError):
pass
class ZlibArchiveReader:
"""
Reader for PyInstaller's PYZ (ZlibArchive) archive. The archive is used to store collected byte-compiled Python
modules, as individually-compressed entries.
"""
_PYZ_MAGIC_PATTERN = b'PYZ\0'
def __init__(self, filename, start_offset=None, check_pymagic=False):
self._filename = filename
self._start_offset = start_offset
self.toc = {}
# If no offset is given, try inferring it from filename
if start_offset is None:
self._filename, self._start_offset = self._parse_offset_from_filename(filename)
# Parse header and load TOC. Standard header contains 12 bytes: PYZ magic pattern, python bytecode magic
# pattern, and offset to TOC (32-bit integer). It might be followed by additional fields, depending on
# implementation version.
with open(self._filename, "rb") as fp:
# Read PYZ magic pattern, located at the start of the file
fp.seek(self._start_offset, os.SEEK_SET)
magic = fp.read(len(self._PYZ_MAGIC_PATTERN))
if magic != self._PYZ_MAGIC_PATTERN:
raise ArchiveReadError("PYZ magic pattern mismatch!")
# Read python magic/version number
pymagic = fp.read(len(PYTHON_MAGIC_NUMBER))
if check_pymagic and pymagic != PYTHON_MAGIC_NUMBER:
raise ArchiveReadError("Python magic pattern mismatch!")
# Read TOC offset
toc_offset, *_ = struct.unpack('!i', fp.read(4))
# Load TOC
fp.seek(self._start_offset + toc_offset, os.SEEK_SET)
self.toc = dict(marshal.load(fp))
@staticmethod
def _parse_offset_from_filename(filename):
"""
Parse the numeric offset from filename, stored as: `/path/to/file?offset`.
"""
offset = 0
idx = filename.rfind('?')
if idx == -1:
return filename, offset
try:
offset = int(filename[idx + 1:])
filename = filename[:idx] # Remove the offset from filename
except ValueError:
# Ignore spurious "?" in the path (for example, like in Windows UNC \\?\<path>).
pass
return filename, offset
def extract(self, name, raw=False):
"""
Extract data from entry with the given name.
If the entry belongs to a module or a package, the data is loaded (unmarshaled) into code object. To retrieve
raw data, set `raw` flag to True.
"""
# Look up entry
entry = self.toc.get(name)
if entry is None:
raise KeyError(f"No entry named {name!r} found in the archive!")
typecode, entry_offset, entry_length = entry
# PEP-420 namespace package does not have a data blob.
if typecode == PYZ_ITEM_NSPKG:
return None
# Read data blob
try:
with open(self._filename, "rb") as fp:
fp.seek(self._start_offset + entry_offset)
obj = fp.read(entry_length)
except FileNotFoundError:
# We open the archive file each time we need to read from it, to avoid locking the file by keeping it open.
# This allows executable to be deleted or moved (renamed) while it is running, which is useful in certain
# scenarios (e.g., automatic update that replaces the executable). The caveat is that once the executable is
# renamed, we cannot read from its embedded PYZ archive anymore. In such case, exit with informative
# message.
raise SystemExit(
f"ERROR: {self._filename} appears to have been moved or deleted since this application was launched. "
"Continouation from this state is impossible. Exiting now."
)
try:
obj = zlib.decompress(obj)
if typecode in (PYZ_ITEM_MODULE, PYZ_ITEM_PKG) and not raw:
obj = marshal.loads(obj)
except EOFError as e:
raise ImportError(f"Failed to unmarshal PYZ entry {name!r}!") from e
return obj
|