1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
|
######################################################################
#
# File: b2sdk/_internal/utils/__init__.py
#
# Copyright 2022 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
from __future__ import annotations
import base64
import hashlib
import logging
import os
import pathlib
import platform
import re
import time
from dataclasses import dataclass, field
from decimal import Decimal
from itertools import chain
from typing import Any, Iterator, NewType, TypeVar
from urllib.parse import quote, unquote_plus
from logfury.v1 import (
DefaultTraceAbstractMeta,
DefaultTraceMeta,
limit_trace_arguments,
disable_trace,
trace_call,
)
logger = logging.getLogger(__name__)
Sha1HexDigest = NewType('Sha1HexDigest', str)
T = TypeVar('T')
# TODO: When we drop Python 3.7 support, this should be replaced
# with typing.Protocol that exposes read method.
ReadOnlyStream = Any
def b2_url_encode(s):
"""
URL-encode a unicode string to be sent to B2 in an HTTP header.
:param s: a unicode string to encode
:type s: str
:return: URL-encoded string
:rtype: str
"""
return quote(s.encode('utf-8'))
def b2_url_decode(s):
"""
Decode a Unicode string returned from B2 in an HTTP header.
:param s: a unicode string to decode
:type s: str
:return: a Python unicode string.
:rtype: str
"""
return unquote_plus(s)
def choose_part_ranges(content_length, minimum_part_size):
"""
Return a list of (offset, length) for the parts of a large file.
:param content_length: content length value
:type content_length: int
:param minimum_part_size: a minimum file part size
:type minimum_part_size: int
:rtype: list
"""
# If the file is at least twice the minimum part size, we are guaranteed
# to be able to break it into multiple parts that are all at least
# the minimum part size.
assert minimum_part_size * 2 <= content_length
# How many parts can we make?
part_count = min(content_length // minimum_part_size, 10000)
assert 2 <= part_count
# All of the parts, except the last, are the same size. The
# last one may be bigger.
part_size = content_length // part_count
last_part_size = content_length - (part_size * (part_count - 1))
assert minimum_part_size <= last_part_size
# Make all of the parts except the last
parts = [(i * part_size, part_size) for i in range(part_count - 1)]
# Add the last part
start_of_last = (part_count - 1) * part_size
last_part = (start_of_last, content_length - start_of_last)
parts.append(last_part)
return parts
def update_digest_from_stream(digest: T, input_stream: ReadOnlyStream, content_length: int) -> T:
"""
Update and return `digest` with data read from `input_stream`
:param digest: a digest object, which exposes an `update(bytes)` method
:param input_stream: stream object, which exposes a `read(int|None)` method
:param content_length: expected length of the stream
:type content_length: int
"""
remaining = content_length
block_size = 1024 * 1024
while remaining != 0:
to_read = min(remaining, block_size)
data = input_stream.read(to_read)
if len(data) != to_read:
raise ValueError(
'content_length(%s) is more than the size of the file' % content_length
)
digest.update(data)
remaining -= to_read
return digest
def hex_sha1_of_stream(input_stream: ReadOnlyStream, content_length: int) -> Sha1HexDigest:
"""
Return the 40-character hex SHA1 checksum of the first content_length
bytes in the input stream.
:param input_stream: stream object, which exposes read(int|None) method
:param content_length: expected length of the stream
:type content_length: int
:rtype: str
"""
return Sha1HexDigest(
update_digest_from_stream(
hashlib.sha1(),
input_stream,
content_length,
).hexdigest()
)
@dataclass
class IncrementalHexDigester:
"""
Calculates digest of a stream or parts of it.
"""
stream: ReadOnlyStream
digest: 'hashlib._Hash' = field( # noqa (_Hash is a dynamic object)
default_factory=hashlib.sha1
)
read_bytes: int = 0
block_size: int = 1024 * 1024
@property
def hex_digest(self) -> Sha1HexDigest:
return Sha1HexDigest(self.digest.hexdigest())
def update_from_stream(
self,
limit: int | None = None,
) -> Sha1HexDigest:
"""
:param limit: How many new bytes try to read from the stream. Default None – read until nothing left.
"""
offset = 0
while True:
if limit is not None:
to_read = min(limit - offset, self.block_size)
else:
to_read = self.block_size
data = self.stream.read(to_read)
data_len = len(data)
if data_len > 0:
self.digest.update(data)
self.read_bytes += data_len
offset += data_len
if data_len < to_read or to_read == 0:
break
return self.hex_digest
def hex_sha1_of_unlimited_stream(
input_stream: ReadOnlyStream,
limit: int | None = None,
) -> tuple[Sha1HexDigest, int]:
digester = IncrementalHexDigester(input_stream)
digester.update_from_stream(limit)
return digester.hex_digest, digester.read_bytes
def hex_sha1_of_file(path_) -> Sha1HexDigest:
with open(path_, 'rb') as file:
return hex_sha1_of_unlimited_stream(file)[0]
def hex_sha1_of_bytes(data: bytes) -> Sha1HexDigest:
"""
Return the 40-character hex SHA1 checksum of the data.
"""
return Sha1HexDigest(hashlib.sha1(data).hexdigest())
def hex_md5_of_bytes(data: bytes) -> str:
"""
Return the 32-character hex MD5 checksum of the data.
"""
return hashlib.md5(data).hexdigest()
def md5_of_bytes(data: bytes) -> bytes:
"""
Return the 16-byte MD5 checksum of the data.
"""
return hashlib.md5(data).digest()
def b64_of_bytes(data: bytes) -> str:
"""
Return the base64 encoded represtantion of the data.
"""
return base64.b64encode(data).decode()
def validate_b2_file_name(name):
"""
Raise a ValueError if the name is not a valid B2 file name.
:param name: a string to check
:type name: str
"""
if not isinstance(name, str):
raise ValueError('file name must be a string, not bytes')
try:
name_utf8 = name.encode('utf-8')
except UnicodeEncodeError:
raise ValueError('file name must be valid Unicode, check locale')
if len(name_utf8) < 1:
raise ValueError('file name too short (0 utf-8 bytes)')
if 1000 < len(name_utf8):
raise ValueError('file name too long (more than 1000 utf-8 bytes)')
if name[0] == '/':
raise ValueError("file names must not start with '/'")
if name[-1] == '/':
raise ValueError("file names must not end with '/'")
if '\\' in name:
raise ValueError("file names must not contain '\\'")
if '//' in name:
raise ValueError("file names must not contain '//'")
if chr(127) in name:
raise ValueError('file names must not contain DEL')
if any(250 < len(segment) for segment in name_utf8.split(b'/')):
raise ValueError("file names segments (between '/') can be at most 250 utf-8 bytes")
def get_file_mtime(local_path):
"""
Get modification time of a file in milliseconds.
:param local_path: a file path
:type local_path: str
:rtype: int
"""
mod_time = os.path.getmtime(local_path) * 1000
return int(mod_time)
def is_special_file(path: str | pathlib.Path) -> bool:
"""
Is the path a special file, such as /dev/null or stdout?
:param path: a "file" path
:return: True if the path is a special file
"""
path_str = str(path)
return (
path == os.devnull
or path_str.startswith('/dev/')
or platform.system() == 'Windows'
and path_str.upper() in ('CON', 'NUL')
)
def set_file_mtime(local_path: str | pathlib.Path, mod_time_millis: int) -> None:
"""
Set modification time of a file in milliseconds.
:param local_path: a file path
:param mod_time_millis: time to be set
"""
mod_time = mod_time_millis / 1000.0
# We have to convert it this way to avoid differences when mtime
# is read from the local file in the next iterations, and time is fetched
# without rounding.
# This is caused by floating point arithmetic as POSIX systems
# represents mtime as floats and B2 as integers.
# E.g. for 1093258377393, it would be converted to 1093258377.393
# which is actually represented by 1093258377.3929998874664306640625.
# When we save mtime and read it again, we will end up with 1093258377392.
# See #617 for details.
mod_time = float(Decimal('%.3f5' % mod_time))
try:
os.utime(local_path, (mod_time, mod_time))
except OSError:
if not is_special_file(local_path):
raise
def fix_windows_path_limit(path):
"""
Prefix paths when running on Windows to overcome 260 character path length limit.
See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
:param path: a path to prefix
:type path: str
:return: a prefixed path
:rtype: str
"""
if platform.system() == 'Windows':
if path.startswith('\\\\'):
# UNC network path
return '\\\\?\\UNC\\' + path[2:]
elif os.path.isabs(path):
# local absolute path
return '\\\\?\\' + path
else:
# relative path, don't alter
return path
else:
return path
def _pick_scale_and_suffix(x):
# suffixes for different scales
suffixes = ' kMGTP'
# We want to use the biggest suffix that makes sense.
ref_digits = str(int(x))
index = (len(ref_digits) - 1) // 3
suffix = suffixes[index]
if suffix == ' ':
suffix = ''
scale = 1000**index
return (scale, suffix)
def format_and_scale_number(x, unit):
"""
Pick a good scale for representing a number and format it.
:param x: a number
:type x: int
:param unit: an arbitrary unit name
:type unit: str
:return: scaled and formatted number
:rtype: str
"""
# simple case for small numbers
if x < 1000:
return '%d %s' % (x, unit)
# pick a scale
(scale, suffix) = _pick_scale_and_suffix(x)
# decide how many digits after the decimal to display
scaled = x / scale
if scaled < 10.0:
fmt = '%1.2f %s%s'
elif scaled < 100.0:
fmt = '%1.1f %s%s'
else:
fmt = '%1.0f %s%s'
# format it
return fmt % (scaled, suffix, unit)
def format_and_scale_fraction(numerator, denominator, unit):
"""
Pick a good scale for representing a fraction, and format it.
:param numerator: a numerator of a fraction
:type numerator: int
:param denominator: a denominator of a fraction
:type denominator: int
:param unit: an arbitrary unit name
:type unit: str
:return: scaled and formatted fraction
:rtype: str
"""
# simple case for small numbers
if denominator < 1000:
return '%d / %d %s' % (numerator, denominator, unit)
# pick a scale
(scale, suffix) = _pick_scale_and_suffix(denominator)
# decide how many digits after the decimal to display
scaled_denominator = denominator / scale
if scaled_denominator < 10.0:
fmt = '%1.2f / %1.2f %s%s'
elif scaled_denominator < 100.0:
fmt = '%1.1f / %1.1f %s%s'
else:
fmt = '%1.0f / %1.0f %s%s'
# format it
scaled_numerator = numerator / scale
return fmt % (scaled_numerator, scaled_denominator, suffix, unit)
_CAMELCASE_TO_UNDERSCORE_RE = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
def camelcase_to_underscore(input_):
"""
Convert a camel-cased string to a string with underscores.
:param input_: an input string
:type input_: str
:return: string with underscores
:rtype: str
"""
return _CAMELCASE_TO_UNDERSCORE_RE.sub(r'_\1', input_).lower()
class B2TraceMeta(DefaultTraceMeta):
"""
Trace all public method calls, except for ones with names that begin with `get_`.
"""
pass
class B2TraceMetaAbstract(DefaultTraceAbstractMeta):
"""
Default class for tracers, to be set as
a metaclass for abstract base classes.
"""
pass
class ConcurrentUsedAuthTokenGuard:
"""
Context manager preventing two tokens being used simultaneously.
Throws UploadTokenUsedConcurrently when unable to acquire a lock
Sample usage:
with ConcurrentUsedAuthTokenGuard(lock_for_token, token):
# code that uses the token exclusively
"""
def __init__(self, lock, token):
self.lock = lock
self.token = token
def __enter__(self):
if not self.lock.acquire(False):
from b2sdk._internal.exception import UploadTokenUsedConcurrently
raise UploadTokenUsedConcurrently(self.token)
def __exit__(self, exc_type, exc_val, exc_tb):
try:
self.lock.release()
except RuntimeError:
# guard against releasing a non-acquired lock
pass
def current_time_millis():
"""
File times are in integer milliseconds, to avoid roundoff errors.
"""
return int(round(time.time() * 1000))
def iterator_peek(iterator: Iterator[T], count: int) -> tuple[list[T], Iterator[T]]:
"""
Get up to the `count` first elements yielded by `iterator`.
The function will read `count` elements from `iterator` or less if the end is reached first. Returns a tuple
consisting of a list of retrieved elements and an iterator equivalent to the input iterator.
"""
ret = []
for _ in range(count):
try:
ret.append(next(iterator))
except StopIteration:
break
return ret, chain(ret, iterator)
assert disable_trace
assert limit_trace_arguments
assert trace_call
|