1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
|
#!/usr/bin/env python3
# --------------------( LICENSE )--------------------
# Copyright (c) 2014-2025 Beartype authors.
# See "LICENSE" for further details.
'''
Project-wide **path copiers** (i.e., low-level callables permanently copying
on-disk files and directories in various reasonably safe and portable ways).
This private submodule is *not* intended for importation by downstream callers.
'''
# ....................{ IMPORTS }....................
from beartype.roar._roarexc import _BeartypeUtilPathDirException
from beartype.typing import Optional
from beartype._data.typing.datatyping import (
CollectionStrs,
PathnameLike,
PathnameLikeTuple,
TypeException,
)
from collections.abc import Callable
from pathlib import Path
from enum import (
Enum,
auto as next_enum_member_value,
unique as die_unless_enum_member_values_unique,
)
from shutil import (
copytree,
ignore_patterns,
)
# ....................{ ENUMERATIONS }....................
@die_unless_enum_member_values_unique
class BeartypeDirCopyOverwritePolicy(Enum):
'''
Enumeration of all kinds of **directory-copying overwrite policies** (i.e.,
competing strategies for handling edge cases in which a target path already
exists when recursively copying directories, each with concomitant tradeoffs
with respect to safety).
Note that enumeration members are intentionally ordered from most safe
(:attr:`.HALT_WITH_EXCEPTION`) to least safe (:attr:`.OVERWRITE`).
Attributes
----------
HALT_WITH_EXCEPTION : EnumMemberType
Policy raising a fatal exception if any target path already exists. This
constitutes the strictest and thus safest such policy.
SKIP_WITH_WARNING : EnumMemberType
Policy ignoring (i.e., skipping) each existing target path with a
non-fatal warning. This policy strikes a comfortable balance between
strictness and laxness and is thus the recommended default.
OVERWRITE : EnumMemberType
Policy silently overwriting each existing target path. This constitutes
the laxest and thus riskiest such policy.
'''
HALT_WITH_EXCEPTION = next_enum_member_value()
SKIP_WITH_WARNING = next_enum_member_value()
OVERWRITE = next_enum_member_value()
# ....................{ COPIERS }....................
#FIXME: Unit test us up, please.
def copy_dir(
# Mandatory parameters.
src_dirname: PathnameLike,
trg_dirname: PathnameLike,
# Optional parameters.
overwrite_policy: BeartypeDirCopyOverwritePolicy = (
BeartypeDirCopyOverwritePolicy.HALT_WITH_EXCEPTION),
ignore_basename_globs: Optional[CollectionStrs] = None,
exception_cls: TypeException = _BeartypeUtilPathDirException,
exception_prefix: str = '',
) -> None:
'''
Recursively copy the source directory with the passed dirname to the
target directory with the passed dirname.
For generality:
* All nonexistent parents of the target directory will be recursively
created, mimicking the action of the ``mkdir -p`` shell command on
POSIX-compatible platforms in a platform-agnostic manner.
* All symbolic links in the source directory will be preserved (i.e.,
copied as is rather than their transitive targets copied instead).
Caveats
-------
**This function is subject to subtle race conditions if multiple threads
and/or processes concurrently attempt to mutate any relevant path on the
local filesystem.** Since *all* filesystem-centric logic suffers similar
issues, we leave this issue as an exercise for the caller.
Parameters
----------
src_dirname : PathnameLike
Absolute or relative dirname of the source directory to be copied from.
trg_dirname : PathnameLike
Absolute or relative dirname of the target directory to be copied to.
overwrite_policy : BeartypeDirCopyOverwritePolicy, default: BeartypeDirCopyOverwritePolicy.HALT_WITH_EXCEPTION
**Directory overwrite policy** (i.e., strategy for handling existing
paths to be overwritten by this copy). Defaults to
:attr:`BeartypeDirCopyOverwritePolicy.HALT_WITH_EXCEPTION`, raising an
exception if any target path already exists.
ignore_basename_globs : Collection[str] | None, default: None
Collection of shell-style globs (e.g., ``('*.tmp', '.keep')``) matching
the basenames of all paths transitively owned by this source directory
to be ignored during recursion and hence neither copied nor visited.
Defaults to ``None``, in which case *all* paths transitively owned by
this source directory are unconditionally copied and visited.
Note this parameter is incompatible with the
:attr:`BeartypeDirCopyOverwritePolicy.OVERWRITE` policy. If this
parameter is non-:data:`None` and the ``overwrite_policy`` parameter is
:attr:`BeartypeDirCopyOverwritePolicy.OVERWRITE`, an exception is
raised.
exception_cls : Type[Exception], default: _BeartypeUtilPathException
Type of exception to be raised in the event of a fatal error. Defaults
to :exc:`._BeartypeUtilPathException`.
exception_prefix : str, default: ''
Human-readable substring prefixed raised exceptions messages. Defaults
to the empty string.
Raises
------
exception_cls
If either:
* The source directory does *not* exist.
* The target directory is a subdirectory of the source directory.
Permitting this edge case induces non-trivial issues, including
infinite recursion from within the musty entrails of the
:mod:`distutils` package (e.g., due to relative symbolic links).
* The passed ``overwrite_policy`` parameter is
:attr:`BeartypeDirCopyOverwritePolicy.HALT_WITH_EXCEPTION` *and* one or more
subdirectories of the target directory already exist that are also
subdirectories of the source directory. For safety, this function
always preserves rather than overwrites existing target
subdirectories.
See Also
-----------
https://stackoverflow.com/a/22588775/2809027
StackOverflow answer strongly inspiring this function's
:attr:`BeartypeDirCopyOverwritePolicy.SKIP_WITH_WARNING` implementation.
'''
assert isinstance(src_dirname, PathnameLikeTuple), (
f'{repr(src_dirname)} neither string nor "Path" object.')
assert isinstance(trg_dirname, PathnameLikeTuple), (
f'{repr(trg_dirname)} neither string nor "Path" object.')
# ....................{ IMPORTS }....................
# Avoid circular import dependencies.
from beartype._util.path.utilpathtest import (
die_if_dir,
die_if_subpath,
die_unless_dir,
)
# ....................{ PREAMBLE }....................
# High-level "Path" objects encapsulating these dirnames.
src_dirname = Path(src_dirname)
trg_dirname = Path(trg_dirname)
# If the source directory does *NOT* exist, raise an exception.
die_unless_dir(
dirname=src_dirname,
exception_cls=exception_cls,
exception_prefix=exception_prefix,
)
# If the target directory is a subdirectory of the source directory, raise
# an exception. Permitting this edge case provokes issues, including
# infinite recursion from within the musty entrails of the "distutils"
# codebase (possibly due to relative symbolic links).
die_if_subpath(
parent_pathname=src_dirname,
child_pathname=trg_dirname,
exception_cls=exception_cls,
exception_prefix=exception_prefix,
)
# If passed an iterable of shell-style globs matching ignorable basenames,
# convert this iterable into a predicate function of the form required by
# the shutil.copytree() function. Specifically, this function accepts the
# absolute or relative pathname of an arbitrary directory and an iterable
# of the basenames of all subdirectories and files directly in this
# directory; this function returns an iterable of the basenames of all
# subdirectories and files in this directory to be ignored. This signature
# resembles:
#
# def ignore_basename_func(
# parent_dirname: str,
# child_basenames: IterableTypes) -> IterableTypes
ignore_basename_func: Optional[Callable] = None
if ignore_basename_globs is not None:
ignore_basename_func = ignore_patterns(*ignore_basename_globs)
# ....................{ POLICIES }....................
# If either:
# * Raising a fatal exception if any target path already exists *OR*...
# * Overwriting this target directory with this source directory...
#
# Then the standard shutil.copytree() function applies to this use case.
if overwrite_policy in _COPY_DIR_OVERWRITE_POLICIES_COPYTREE:
# Dictionary of all keyword arguments to pass to shutil.copytree(),
# preserving symbolic links as is.
copytree_kwargs: dict = {
'symlinks': True,
}
# If raising a fatal exception if any target path already exists, do so.
# While we could defer to the exception raised by the shutil.copytree()
# function for this case, this exception's message erroneously refers to
# this directory as a file and is hence best avoided as unreadable:
# [Errno 17] File exists: 'sample_sim' # <-- lolbro! useless.
if (
overwrite_policy is
BeartypeDirCopyOverwritePolicy.HALT_WITH_EXCEPTION
):
die_if_dir(
dirname=trg_dirname,
exception_cls=exception_cls,
exception_prefix=exception_prefix,
)
# Else, this target directory is being overwritten by this source
# directory. In this case, silently accept this target directory if this
# directory already exists.
else:
copytree_kwargs['dirs_exist_ok'] = True
# If ignoring basenames, inform shutil.copytree() of these basenames.
if ignore_basename_func is not None:
copytree_kwargs['ignore'] = ignore_basename_func
# Else, no basenames are being ignored.
# Recursively copy this source to target directory. To avoid silently
# overwriting all conflicting target paths, the shutil.copytree()
# rather than dir_util.copy_tree() function is called.
copytree(src=src_dirname, dst=trg_dirname, **copytree_kwargs)
#FIXME: Given how awesomely flexible the manual approach implemented below
#is, we should probably consider simply rewriting the above two approaches
#to reuse the exact same logic. It works. It's preferable. Let's reuse it.
#FIXME: Actually, this is increasingly critical. Third-party functions
#called above -- notably, the dir_util.copy_tree() function -- appear to
#suffer critical edge cases. This can be demonstrated via the BETSEE GUI by
#attempting to save an opened simulation configuration to a subdirectory of
#itself, which appears to provoke infinite recursion from within the musty
#depths of the "distutils" codebase. Of course, the implementation below
#could conceivably suffer similar issues. If this is the case, this
#function should explicitly detect attempts to recursively copy a source
#directory into a subdirectory of itself and raise an exception.
#FIXME: Uncomment this if and when we actually need it. So, probably never.
#Refactoring this out of the BETSE codebase is tedious beyond belief. *sigh*
# # Else if logging a warning for each target path that already exists, do so
# # by manually implementing recursive directory copying. Sadly, Python
# # provides no means of doing so "out of the box."
# elif overwrite_policy is BeartypeDirCopyOverwritePolicy.SKIP_WITH_WARNING:
# # Avoid circular import dependencies.
# from betse.util.path import files, paths, pathnames
# from betse.util.type.iterable import sequences
#
# # Passed parameters renamed for disambiguity.
# src_root_dirname = src_dirname
# trg_root_dirname = trg_dirname
#
# # Basename of the top-level target directory to be copied to.
# trg_root_basename = pathnames.get_basename(src_root_dirname)
#
# # For the absolute pathname of each recursively visited source
# # directory, an iterable of the basenames of all subdirectories of this
# # directory, and an iterable of the basenames of all files of this
# # directory...
# for src_parent_dirname, subdir_basenames, file_basenames in _walk(
# src_root_dirname):
# # Relative pathname of the currently visited source directory
# # relative to the absolute pathname of this directory.
# parent_dirname_relative = pathnames.relativize(
# src_dirname=src_root_dirname, trg_pathname=src_parent_dirname)
#
# # If ignoring basenames...
# if ignore_basename_func is not None:
# # Sets of the basenames of all ignorable subdirectories and
# # files of this source directory.
# subdir_basenames_ignored = ignore_basename_func(
# src_parent_dirname, subdir_basenames)
# file_basenames_ignored = ignore_basename_func(
# src_parent_dirname, file_basenames)
#
# # If ignoring one or more subdirectories...
# if subdir_basenames_ignored:
# # Log the basenames of these subdirectories.
# logs.log_debug(
# 'Ignoring source "%s/%s" subdirectories: %r',
# trg_root_basename,
# parent_dirname_relative,
# subdir_basenames_ignored)
#
# # Remove these subdirectories from the original iterable.
# # Since the os.walk() function supports in-place changes to
# # this iterable, this iterable is modified via this less
# # efficient function rather than efficient alternatives
# # (e.g., set subtraction).
# sequences.remove_items(
# sequence=subdir_basenames,
# items=subdir_basenames_ignored)
#
# # If ignoring one or more files...
# if file_basenames_ignored:
# # Log the basenames of these files.
# logs.log_debug(
# 'Ignoring source "%s/%s" files: %r',
# trg_root_basename,
# parent_dirname_relative,
# file_basenames_ignored)
#
# # Remove these files from the original iterable. Unlike
# # above, we could technically modify this iterable via
# # set subtraction: e.g.,
# #
# # subdir_basenames -= subdir_basenames_ignored
# #
# # For orthogonality, preserve the above approach instead.
# sequences.remove_items(
# sequence=file_basenames,
# items=file_basenames_ignored)
#
# # Absolute pathname of the corresponding target directory.
# trg_parent_dirname = pathnames.join(
# trg_root_dirname, parent_dirname_relative)
#
# # Create this target directory if needed.
# make_unless_dir(trg_parent_dirname)
#
# # For the basename of each non-ignorable file of this source
# # directory...
# for file_basename in file_basenames:
# # Absolute filenames of this source and target file.
# src_filename = pathnames.join(
# src_parent_dirname, file_basename)
# trg_filename = pathnames.join(
# trg_parent_dirname, file_basename)
#
# # If this target file already exists...
# if paths.is_path(trg_filename):
# # Relative filename of this file. The absolute filename of
# # this source or target file could be logged instead, but
# # this relative filename is significantly more terse.
# filename_relative = pathnames.join(
# trg_root_basename,
# parent_dirname_relative,
# file_basename)
#
# # Warn of this file being ignored.
# logs.log_warning(
# 'Ignoring existing target file: %s', filename_relative)
#
# # Ignore this file by continuing to the next.
# continue
#
# # Copy this source to target file.
# files.copy(
# src_filename=src_filename, trg_filename=trg_filename)
# Else, this overwrite policy is unrecognized. Raise an exception.
else:
raise exception_cls(
f'{exception_prefix}'
f'overwrite policy "{overwrite_policy}" unrecognized.'
)
# ....................{ PRIVATE ~ constants }....................
_COPY_DIR_OVERWRITE_POLICIES_COPYTREE = frozenset((
BeartypeDirCopyOverwritePolicy.HALT_WITH_EXCEPTION,
BeartypeDirCopyOverwritePolicy.OVERWRITE,
))
'''
Frozen set of all **copytree-friendly directory overwrite policies** (i.e.,
:class:`.BeartypeDirCopyOverwritePolicy` enumeration members suitable for
passing as the ``overwrite_policy`` parameter to the :func:`.copy_dir` function
such that the resulting implementation reduces to a trivial call of the standard
:func:`shutil.copytree` function).
'''
|