1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
#
# The internetarchive module is a Python/CLI interface to Archive.org.
#
# Copyright (C) 2012-2019 Internet Archive
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Delete files from Archive.org.
usage:
ia delete <identifier> <file>... [options]...
ia delete <identifier> [options]...
ia delete --help
options:
-h, --help
-q, --quiet Print status to stdout.
-c, --cascade Delete all files associated with the specified file,
including upstream derivatives and the original.
file.
-H, --header=<key:value>... S3 HTTP headers to send with your request.
-a, --all Delete all files in the given item (Note: Some files,
such as <identifier>_meta.xml and <identifier>_files.xml,
cannot be deleted)
-d, --dry-run Output files to be deleted to stdout, but don't actually
delete.
-g, --glob=<pattern> Only delete files matching the given pattern.
-f, --format=<format>... Only only delete files matching the specified format(s).
-R, --retries=<i> Number of times to retry if S3 returns a 503 SlowDown
error [default: 2].
--no-backup Turn off archive.org backups. Clobbered files
will not be saved to history/files/$key.~N~
[default: True].
"""
import sys
import requests.exceptions
from docopt import docopt, printable_usage
from schema import And, Or, Schema, SchemaError, Use # type: ignore[import]
from internetarchive import ArchiveSession
from internetarchive.cli.argparser import convert_str_list_to_unicode, get_args_dict
from internetarchive.utils import get_s3_xml_text
def main(argv, session: ArchiveSession) -> None:
args = docopt(__doc__, argv=argv)
# Validation error messages.
invalid_id_msg = ('<identifier> should be between 3 and 80 characters in length, and '
'can only contain alphanumeric characters, underscores ( _ ), or '
'dashes ( - )')
# Validate args.
s = Schema({
str: Use(bool),
'<file>': list,
'--format': list,
'--header': Or(None, And(Use(get_args_dict), dict),
error='--header must be formatted as --header="key:value"'),
'--glob': list,
'delete': bool,
'--retries': Use(lambda i: int(i[0])),
'<identifier>': str,
})
try:
args = s.validate(args)
except SchemaError as exc:
print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr)
sys.exit(1)
verbose = True if not args['--quiet'] else False
item = session.get_item(args['<identifier>'])
if not item.exists:
print('{0}: skipping, item does\'t exist.', file=sys.stderr)
# Files that cannot be deleted via S3.
no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite']
# Add keep-old-version by default.
if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']:
args['--header']['x-archive-keep-old-version'] = '1'
if verbose:
print(f'Deleting files from {item.identifier}', file=sys.stderr)
if args['--all']:
files = list(item.get_files())
args['--cascade'] = True
elif args['--glob']:
files = item.get_files(glob_pattern=args['--glob'])
elif args['--format']:
files = item.get_files(formats=args['--format'])
else:
fnames = []
if args['<file>'] == ['-']:
fnames = [f.strip() for f in sys.stdin]
else:
fnames = [f.strip() for f in args['<file>']]
files = list(item.get_files(fnames))
if not files:
print(' warning: no files found, nothing deleted.', file=sys.stderr)
sys.exit(1)
errors = False
for f in files:
if not f:
if verbose:
print(f' error: "{f.name}" does not exist', file=sys.stderr)
errors = True
if any(f.name.endswith(s) for s in no_delete):
continue
if args['--dry-run']:
print(f' will delete: {item.identifier}/{f.name}', file=sys.stderr)
continue
try:
resp = f.delete(verbose=verbose,
cascade_delete=args['--cascade'],
headers=args['--header'],
retries=args['--retries'])
except requests.exceptions.RetryError as e:
print(f' error: max retries exceeded for {f.name}', file=sys.stderr)
errors = True
continue
if resp.status_code != 204:
errors = True
msg = get_s3_xml_text(resp.content)
print(f' error: {msg} ({resp.status_code})', file=sys.stderr)
continue
if errors is True:
sys.exit(1)
|