1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
"""Function for *mirroring* a filesystem.
Mirroring will create a copy of a source filesystem on a destination
filesystem. If there are no files on the destination, then mirroring
is simply a straight copy. If there are any files or directories on the
destination they may be deleted or modified to match the source.
In order to avoid redundant copying of files, `mirror` can compare
timestamps, and only copy files with a newer modified date. This
timestamp comparison is only done if the file sizes are different.
This scheme will work if you have mirrored a directory previously, and
you would like to copy any changes. Otherwise you should set the
``copy_if_newer`` parameter to `False` to guarantee an exact copy, at
the expense of potentially copying extra files.
"""
from __future__ import print_function, unicode_literals
import typing
from ._bulk import Copier
from .copy import copy_file_internal
from .errors import ResourceNotFound
from .opener import manage_fs
from .tools import is_thread_safe
from .walk import Walker
if typing.TYPE_CHECKING:
from typing import Callable, Optional, Text, Union
from .base import FS
from .info import Info
def _compare(info1, info2):
# type: (Info, Info) -> bool
"""Compare two `Info` objects to see if they should be copied.
Returns:
bool: `True` if the `Info` are different in size or mtime.
"""
# Check filesize has changed
if info1.size != info2.size:
return True
# Check modified dates
date1 = info1.modified
date2 = info2.modified
return date1 is None or date2 is None or date1 > date2
def mirror(
src_fs, # type: Union[FS, Text]
dst_fs, # type: Union[FS, Text]
walker=None, # type: Optional[Walker]
copy_if_newer=True, # type: bool
workers=0, # type: int
preserve_time=False, # type: bool
):
# type: (...) -> None
"""Mirror files / directories from one filesystem to another.
Mirroring a filesystem will create an exact copy of ``src_fs`` on
``dst_fs``, by removing any files / directories on the destination
that aren't on the source, and copying files that aren't.
Arguments:
src_fs (FS or str): Source filesystem (URL or instance).
dst_fs (FS or str): Destination filesystem (URL or instance).
walker (~fs.walk.Walker, optional): An optional walker instance.
copy_if_newer (bool): Only copy newer files (the default).
workers (int): Number of worker threads used
(0 for single threaded). Set to a relatively low number
for network filesystems, 4 would be a good start.
preserve_time (bool): If `True`, try to preserve mtime of the
resources (defaults to `False`).
"""
def src():
return manage_fs(src_fs, writeable=False)
def dst():
return manage_fs(dst_fs, create=True)
with src() as _src_fs, dst() as _dst_fs:
_thread_safe = is_thread_safe(_src_fs, _dst_fs)
with Copier(
num_workers=workers if _thread_safe else 0, preserve_time=preserve_time
) as copier:
with _src_fs.lock(), _dst_fs.lock():
_mirror(
_src_fs,
_dst_fs,
walker=walker,
copy_if_newer=copy_if_newer,
copy_file=copier.copy,
preserve_time=preserve_time,
)
def _mirror(
src_fs, # type: FS
dst_fs, # type: FS
walker=None, # type: Optional[Walker]
copy_if_newer=True, # type: bool
copy_file=copy_file_internal, # type: Callable[[FS, str, FS, str, bool], None]
preserve_time=False, # type: bool
):
# type: (...) -> None
walker = walker or Walker()
walk = walker.walk(src_fs, namespaces=["details"])
for path, dirs, files in walk:
try:
dst = {
info.name: info for info in dst_fs.scandir(path, namespaces=["details"])
}
except ResourceNotFound:
dst_fs.makedir(path)
dst = {}
# Copy files
for _file in files:
_path = _file.make_path(path)
dst_file = dst.pop(_file.name, None)
if dst_file is not None:
if dst_file.is_dir:
# Destination is a directory, remove it
dst_fs.removetree(_path)
else:
# Compare file info
if copy_if_newer and not _compare(_file, dst_file):
continue
copy_file(src_fs, _path, dst_fs, _path, preserve_time)
# Make directories
for _dir in dirs:
_path = _dir.make_path(path)
dst_dir = dst.pop(_dir.name, None)
if dst_dir is not None:
# Directory name exists on dst
if not dst_dir.is_dir:
# Not a directory, so remove it
dst_fs.remove(_path)
else:
# Make the directory in dst
dst_fs.makedir(_path, recreate=True)
# Remove any remaining resources
while dst:
_, info = dst.popitem()
_path = info.make_path(path)
if info.is_dir:
dst_fs.removetree(_path)
else:
dst_fs.remove(_path)
|