1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
|
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
import concurrent.futures as futures
import errno
import os
import stat
import sys
from collections import Counter, OrderedDict, defaultdict
import six
import mozpack.path as mozpath
from mozpack.errors import errors
from mozpack.files import BaseFile, DeflatedFile, Dest, ManifestFile
class FileRegistry(object):
'''
Generic container to keep track of a set of BaseFile instances. It
preserves the order under which the files are added, but doesn't keep
track of empty directories (directories are not stored at all).
The paths associated with the BaseFile instances are relative to an
unspecified (virtual) root directory.
registry = FileRegistry()
registry.add('foo/bar', file_instance)
'''
def __init__(self):
self._files = OrderedDict()
self._required_directories = Counter()
self._partial_paths_cache = {}
def _partial_paths(self, path):
'''
Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
'''
dir_name = path.rpartition('/')[0]
if not dir_name:
return []
partial_paths = self._partial_paths_cache.get(dir_name)
if partial_paths:
return partial_paths
partial_paths = [dir_name] + self._partial_paths(dir_name)
self._partial_paths_cache[dir_name] = partial_paths
return partial_paths
def add(self, path, content):
'''
Add a BaseFile instance to the container, under the given path.
'''
assert isinstance(content, BaseFile)
if path in self._files:
return errors.error("%s already added" % path)
if self._required_directories[path] > 0:
return errors.error("Can't add %s: it is a required directory" %
path)
# Check whether any parent of the given path is already stored
partial_paths = self._partial_paths(path)
for partial_path in partial_paths:
if partial_path in self._files:
return errors.error("Can't add %s: %s is a file" %
(path, partial_path))
self._files[path] = content
self._required_directories.update(partial_paths)
def match(self, pattern):
'''
Return the list of paths, stored in the container, matching the
given pattern. See the mozpack.path.match documentation for a
description of the handled patterns.
'''
if '*' in pattern:
return [p for p in self.paths()
if mozpath.match(p, pattern)]
if pattern == '':
return self.paths()
if pattern in self._files:
return [pattern]
return [p for p in self.paths()
if mozpath.basedir(p, [pattern]) == pattern]
def remove(self, pattern):
'''
Remove paths matching the given pattern from the container. See the
mozpack.path.match documentation for a description of the handled
patterns.
'''
items = self.match(pattern)
if not items:
return errors.error("Can't remove %s: %s" % (pattern,
"not matching anything previously added"))
for i in items:
del self._files[i]
self._required_directories.subtract(self._partial_paths(i))
def paths(self):
'''
Return all paths stored in the container, in the order they were added.
'''
return list(self._files)
def __len__(self):
'''
Return number of paths stored in the container.
'''
return len(self._files)
def __contains__(self, pattern):
raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
self.__class__.__name__)
def contains(self, pattern):
'''
Return whether the container contains paths matching the given
pattern. See the mozpack.path.match documentation for a description of
the handled patterns.
'''
return len(self.match(pattern)) > 0
def __getitem__(self, path):
'''
Return the BaseFile instance stored in the container for the given
path.
'''
return self._files[path]
def __iter__(self):
'''
Iterate over all (path, BaseFile instance) pairs from the container.
for path, file in registry:
(...)
'''
return six.iteritems(self._files)
def required_directories(self):
'''
Return the set of directories required by the paths in the container,
in no particular order. The returned directories are relative to an
unspecified (virtual) root directory (and do not include said root
directory).
'''
return set(k for k, v in self._required_directories.items() if v > 0)
def output_to_inputs_tree(self):
'''
Return a dictionary mapping each output path to the set of its
required input paths.
All paths are normalized.
'''
tree = {}
for output, file in self:
output = mozpath.normpath(output)
tree[output] = set(mozpath.normpath(f) for f in file.inputs())
return tree
def input_to_outputs_tree(self):
'''
Return a dictionary mapping each input path to the set of
impacted output paths.
All paths are normalized.
'''
tree = defaultdict(set)
for output, file in self:
output = mozpath.normpath(output)
for input in file.inputs():
input = mozpath.normpath(input)
tree[input].add(output)
return dict(tree)
class FileRegistrySubtree(object):
'''A proxy class to give access to a subtree of an existing FileRegistry.
Note this doesn't implement the whole FileRegistry interface.'''
def __new__(cls, base, registry):
if not base:
return registry
return object.__new__(cls)
def __init__(self, base, registry):
self._base = base
self._registry = registry
def _get_path(self, path):
# mozpath.join will return a trailing slash if path is empty, and we
# don't want that.
return mozpath.join(self._base, path) if path else self._base
def add(self, path, content):
return self._registry.add(self._get_path(path), content)
def match(self, pattern):
return [mozpath.relpath(p, self._base)
for p in self._registry.match(self._get_path(pattern))]
def remove(self, pattern):
return self._registry.remove(self._get_path(pattern))
def paths(self):
return [p for p, f in self]
def __len__(self):
return len(self.paths())
def contains(self, pattern):
return self._registry.contains(self._get_path(pattern))
def __getitem__(self, path):
return self._registry[self._get_path(path)]
def __iter__(self):
for p, f in self._registry:
if mozpath.basedir(p, [self._base]):
yield mozpath.relpath(p, self._base), f
class FileCopyResult(object):
"""Represents results of a FileCopier.copy operation."""
def __init__(self):
self.updated_files = set()
self.existing_files = set()
self.removed_files = set()
self.removed_directories = set()
@property
def updated_files_count(self):
return len(self.updated_files)
@property
def existing_files_count(self):
return len(self.existing_files)
@property
def removed_files_count(self):
return len(self.removed_files)
@property
def removed_directories_count(self):
return len(self.removed_directories)
class FileCopier(FileRegistry):
'''
FileRegistry with the ability to copy the registered files to a separate
directory.
'''
def copy(self, destination, skip_if_older=True,
remove_unaccounted=True,
remove_all_directory_symlinks=True,
remove_empty_directories=True):
'''
Copy all registered files to the given destination path. The given
destination can be an existing directory, or not exist at all. It
can't be e.g. a file.
The copy process acts a bit like rsync: files are not copied when they
don't need to (see mozpack.files for details on file.copy).
By default, files in the destination directory that aren't
registered are removed and empty directories are deleted. In
addition, all directory symlinks in the destination directory
are deleted: this is a conservative approach to ensure that we
never accidently write files into a directory that is not the
destination directory. In the worst case, we might have a
directory symlink in the object directory to the source
directory.
To disable removing of unregistered files, pass
remove_unaccounted=False. To disable removing empty
directories, pass remove_empty_directories=False. In rare
cases, you might want to maintain directory symlinks in the
destination directory (at least those that are not required to
be regular directories): pass
remove_all_directory_symlinks=False. Exercise caution with
this flag: you almost certainly do not want to preserve
directory symlinks.
Returns a FileCopyResult that details what changed.
'''
assert isinstance(destination, six.string_types)
assert not os.path.exists(destination) or os.path.isdir(destination)
result = FileCopyResult()
have_symlinks = hasattr(os, 'symlink')
destination = os.path.normpath(destination)
# We create the destination directory specially. We can't do this as
# part of the loop doing mkdir() below because that loop munges
# symlinks and permissions and parent directories of the destination
# directory may have their own weird schema. The contract is we only
# manage children of destination, not its parents.
try:
os.makedirs(destination)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# Because we could be handling thousands of files, code in this
# function is optimized to minimize system calls. We prefer CPU time
# in Python over possibly I/O bound filesystem calls to stat() and
# friends.
required_dirs = set([destination])
required_dirs |= set(os.path.normpath(os.path.join(destination, d))
for d in self.required_directories())
# Ensure destination directories are in place and proper.
#
# The "proper" bit is important. We need to ensure that directories
# have appropriate permissions or we will be unable to discover
# and write files. Furthermore, we need to verify directories aren't
# symlinks.
#
# Symlinked directories (a symlink whose target is a directory) are
# incompatible with us because our manifest talks in terms of files,
# not directories. If we leave symlinked directories unchecked, we
# would blindly follow symlinks and this might confuse file
# installation. For example, if an existing directory is a symlink
# to directory X and we attempt to install a symlink in this directory
# to a file in directory X, we may create a recursive symlink!
for d in sorted(required_dirs, key=len):
try:
os.mkdir(d)
except OSError as error:
if error.errno != errno.EEXIST:
raise
# We allow the destination to be a symlink because the caller
# is responsible for managing the destination and we assume
# they know what they are doing.
if have_symlinks and d != destination:
st = os.lstat(d)
if stat.S_ISLNK(st.st_mode):
# While we have remove_unaccounted, it doesn't apply
# to directory symlinks because if it did, our behavior
# could be very wrong.
os.remove(d)
os.mkdir(d)
if not os.access(d, os.W_OK):
umask = os.umask(0o077)
os.umask(umask)
os.chmod(d, 0o777 & ~umask)
if isinstance(remove_unaccounted, FileRegistry):
existing_files = set(os.path.normpath(os.path.join(destination, p))
for p in remove_unaccounted.paths())
existing_dirs = set(os.path.normpath(os.path.join(destination, p))
for p in remove_unaccounted
.required_directories())
existing_dirs |= {os.path.normpath(destination)}
else:
# While we have remove_unaccounted, it doesn't apply to empty
# directories because it wouldn't make sense: an empty directory
# is empty, so removing it should have no effect.
existing_dirs = set()
existing_files = set()
for root, dirs, files in os.walk(destination):
# We need to perform the same symlink detection as above.
# os.walk() doesn't follow symlinks into directories by
# default, so we need to check dirs (we can't wait for root).
if have_symlinks:
filtered = []
for d in dirs:
full = os.path.join(root, d)
st = os.lstat(full)
if stat.S_ISLNK(st.st_mode):
# This directory symlink is not a required
# directory: any such symlink would have been
# removed and a directory created above.
if remove_all_directory_symlinks:
os.remove(full)
result.removed_files.add(
os.path.normpath(full))
else:
existing_files.add(os.path.normpath(full))
else:
filtered.append(d)
dirs[:] = filtered
existing_dirs.add(os.path.normpath(root))
for d in dirs:
existing_dirs.add(os.path.normpath(os.path.join(root, d)))
for f in files:
existing_files.add(os.path.normpath(os.path.join(root, f)))
# Now we reconcile the state of the world against what we want.
dest_files = set()
# Install files.
#
# Creating/appending new files on Windows/NTFS is slow. So we use a
# thread pool to speed it up significantly. The performance of this
# loop is so critical to common build operations on Linux that the
# overhead of the thread pool is worth avoiding, so we have 2 code
# paths. We also employ a low water mark to prevent thread pool
# creation if number of files is too small to benefit.
copy_results = []
if sys.platform == 'win32' and len(self) > 100:
with futures.ThreadPoolExecutor(4) as e:
fs = []
for p, f in self:
destfile = os.path.normpath(os.path.join(destination, p))
fs.append((destfile, e.submit(f.copy, destfile, skip_if_older)))
copy_results = [(path, f.result) for path, f in fs]
else:
for p, f in self:
destfile = os.path.normpath(os.path.join(destination, p))
copy_results.append((destfile, f.copy(destfile, skip_if_older)))
for destfile, copy_result in copy_results:
dest_files.add(destfile)
if copy_result:
result.updated_files.add(destfile)
else:
result.existing_files.add(destfile)
# Remove files no longer accounted for.
if remove_unaccounted:
for f in existing_files - dest_files:
# Windows requires write access to remove files.
if os.name == 'nt' and not os.access(f, os.W_OK):
# It doesn't matter what we set permissions to since we
# will remove this file shortly.
os.chmod(f, 0o600)
os.remove(f)
result.removed_files.add(f)
if not remove_empty_directories:
return result
# Figure out which directories can be removed. This is complicated
# by the fact we optionally remove existing files. This would be easy
# if we walked the directory tree after installing files. But, we're
# trying to minimize system calls.
# Start with the ideal set.
remove_dirs = existing_dirs - required_dirs
# Then don't remove directories if we didn't remove unaccounted files
# and one of those files exists.
if not remove_unaccounted:
parents = set()
pathsep = os.path.sep
for f in existing_files:
path = f
while True:
# All the paths are normalized and relative by this point,
# so os.path.dirname would only do extra work.
dirname = path.rpartition(pathsep)[0]
if dirname in parents:
break
parents.add(dirname)
path = dirname
remove_dirs -= parents
# Remove empty directories that aren't required.
for d in sorted(remove_dirs, key=len, reverse=True):
try:
try:
os.rmdir(d)
except OSError as e:
if e.errno in (errno.EPERM, errno.EACCES):
# Permissions may not allow deletion. So ensure write
# access is in place before attempting to rmdir again.
os.chmod(d, 0o700)
os.rmdir(d)
else:
raise
except OSError as e:
# If remove_unaccounted is a # FileRegistry, then we have a
# list of directories that may not be empty, so ignore rmdir
# ENOTEMPTY errors for them.
if (isinstance(remove_unaccounted, FileRegistry) and
e.errno == errno.ENOTEMPTY):
continue
raise
result.removed_directories.add(d)
return result
class Jarrer(FileRegistry, BaseFile):
'''
FileRegistry with the ability to copy and pack the registered files as a
jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
'''
def __init__(self, compress=True):
'''
Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
for details on the compress argument.
'''
self.compress = compress
self._preload = []
self._compress_options = {} # Map path to compress boolean option.
FileRegistry.__init__(self)
def add(self, path, content, compress=None):
FileRegistry.add(self, path, content)
if compress is not None:
self._compress_options[path] = compress
def copy(self, dest, skip_if_older=True):
'''
Pack all registered files in the given destination jar. The given
destination jar may be a path to jar file, or a Dest instance for
a jar file.
If the destination jar file exists, its (compressed) contents are used
instead of the registered BaseFile instances when appropriate.
'''
class DeflaterDest(Dest):
'''
Dest-like class, reading from a file-like object initially, but
switching to a Deflater object if written to.
dest = DeflaterDest(original_file)
dest.read() # Reads original_file
dest.write(data) # Creates a Deflater and write data there
dest.read() # Re-opens the Deflater and reads from it
'''
def __init__(self, orig=None, compress=True):
self.mode = None
self.deflater = orig
self.compress = compress
def read(self, length=-1):
if self.mode != 'r':
assert self.mode is None
self.mode = 'r'
return self.deflater.read(length)
def write(self, data):
if self.mode != 'w':
from mozpack.mozjar import Deflater
self.deflater = Deflater(self.compress)
self.mode = 'w'
self.deflater.write(data)
def exists(self):
return self.deflater is not None
if isinstance(dest, six.string_types):
dest = Dest(dest)
assert isinstance(dest, Dest)
from mozpack.mozjar import JarWriter, JarReader, JAR_BROTLI
try:
old_jar = JarReader(fileobj=dest)
except Exception:
old_jar = []
old_contents = dict([(f.filename, f) for f in old_jar])
with JarWriter(fileobj=dest, compress=self.compress) as jar:
for path, file in self:
compress = self._compress_options.get(path, self.compress)
# Temporary: Because l10n repacks can't handle brotli just yet,
# but need to be able to decompress those files, per
# UnpackFinder and formatters, we force deflate on them.
if compress == JAR_BROTLI and (
isinstance(file, ManifestFile) or
mozpath.basename(path) == 'install.rdf'):
compress = True
# If the added content already comes from a jar file, we just add
# the raw data from the original jar file to the new one.
if isinstance(file, DeflatedFile):
jar.add(path, file.file, mode=file.mode,
compress=file.file.compress)
continue
# If the file is already in the old contents for this jar,
# we avoid compressing when the contents match, which requires
# decompressing the old content. But for e.g. l10n repacks,
# which can't decompress brotli, we skip this.
elif path in old_contents and old_contents[path].compress != JAR_BROTLI:
deflater = DeflaterDest(old_contents[path], compress)
else:
deflater = DeflaterDest(compress=compress)
file.copy(deflater, skip_if_older)
jar.add(path, deflater.deflater, mode=file.mode, compress=compress)
if self._preload:
jar.preload(self._preload)
def open(self):
raise RuntimeError('unsupported')
def preload(self, paths):
'''
Add the given set of paths to the list of preloaded files. See
mozpack.mozjar.JarWriter documentation for details on jar preloading.
'''
self._preload.extend(paths)
|