File: ia_copy.py

package info (click to toggle)
python-internetarchive 3.3.0-2~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,096 kB
  • sloc: python: 6,276; xml: 180; makefile: 180
file content (147 lines) | stat: -rw-r--r-- 6,307 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#
# The internetarchive module is a Python/CLI interface to Archive.org.
#
# Copyright (C) 2012-2021 Internet Archive
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Copy files in archive.org items.

usage:
    ia copy <src-identifier>/<src-file> <dest-identifier>/<dest-file> [options]...
    ia copy --help

options:
    -h, --help
    -m, --metadata=<key:value>...  Metadata to add to your new item, if you are moving
                                   the file to a new item.
    --replace-metadata             Only use metadata specified as argument,
                                   do not copy any from the source item.
    -H, --header=<key:value>...    S3 HTTP headers to send with your request.
    --ignore-file-metadata         Do not copy file metadata.
    -n, --no-derive                Do not derive uploaded files.
    --no-backup                    Turn off archive.org backups. Clobbered files
                                   will not be saved to history/files/$key.~N~
                                   [default: True].
"""
from __future__ import annotations

import sys
from urllib.parse import quote

from docopt import docopt, printable_usage
from requests import Response
from schema import And, Or, Schema, SchemaError, Use  # type: ignore[import]

import internetarchive as ia
from internetarchive.cli.argparser import get_args_dict
from internetarchive.utils import get_s3_xml_text, merge_dictionaries


def assert_src_file_exists(src_location: str) -> bool:
    assert SRC_ITEM.exists  # type: ignore
    global SRC_FILE
    src_filename = src_location.split('/', 1)[-1]
    SRC_FILE = SRC_ITEM.get_file(src_filename)  # type: ignore
    assert SRC_FILE.exists  # type: ignore
    return True


def main(
    argv: list[str] | None, session: ia.session.ArchiveSession, cmd: str = 'copy'
) -> tuple[Response, ia.files.File]:
    args = docopt(__doc__, argv=argv)
    src_path = args['<src-identifier>/<src-file>']
    dest_path = args['<dest-identifier>/<dest-file>']

    # If src == dest, file gets deleted!
    try:
        assert src_path != dest_path
    except AssertionError:
        print('error: The source and destination files cannot be the same!',
              file=sys.stderr)
        sys.exit(1)

    global SRC_ITEM
    SRC_ITEM = session.get_item(src_path.split('/')[0])  # type: ignore

    # Validate args.
    s = Schema({
        str: Use(bool),
        '<src-identifier>/<src-file>': And(str, And(And(str, lambda x: '/' in x,
            error='Destination not formatted correctly. See usage example.'),
            assert_src_file_exists, error=(
            f'https://{session.host}/download/{src_path} does not exist. '
            'Please check the identifier and filepath and retry.'))),
        '<dest-identifier>/<dest-file>': And(str, lambda x: '/' in x,
            error='Destination not formatted correctly. See usage example.'),
        '--metadata': Or(None, And(Use(get_args_dict), dict),
                         error='--metadata must be formatted as --metadata="key:value"'),
        '--replace-metadata': Use(bool),
        '--header': Or(None, And(Use(get_args_dict), dict),
                       error='--header must be formatted as --header="key:value"'),
        '--ignore-file-metadata': Use(bool),
    })

    try:
        args = s.validate(args)
    except SchemaError as exc:
        # This module is sometimes called by other modules.
        # Replace references to 'ia copy' in ___doc__ to 'ia {cmd}' for clarity.
        usage = printable_usage(__doc__.replace('ia copy', f'ia {cmd}'))
        print(f'{exc}\n{usage}', file=sys.stderr)
        sys.exit(1)

    args['--header']['x-amz-copy-source'] = f'/{quote(src_path)}'
    # Copy the old metadata verbatim if no additional metadata is supplied,
    # else combine the old and the new metadata in a sensible manner.
    if args['--metadata'] or args['--replace-metadata']:
        args['--header']['x-amz-metadata-directive'] = 'REPLACE'
    else:
        args['--header']['x-amz-metadata-directive'] = 'COPY'

    # New metadata takes precedence over old metadata.
    if not args['--replace-metadata']:
        args['--metadata'] = merge_dictionaries(SRC_ITEM.metadata,  # type: ignore
                                                args['--metadata'])

    # File metadata is copied by default but can be dropped.
    file_metadata = None if args['--ignore-file-metadata'] else SRC_FILE.metadata  # type: ignore

    # Add keep-old-version by default.
    if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']:
        args['--header']['x-archive-keep-old-version'] = '1'

    url = f'{session.protocol}//s3.us.archive.org/{quote(dest_path)}'
    queue_derive = True if args['--no-derive'] is False else False
    req = ia.iarequest.S3Request(url=url,
                                 method='PUT',
                                 metadata=args['--metadata'],
                                 file_metadata=file_metadata,
                                 headers=args['--header'],
                                 queue_derive=queue_derive,
                                 access_key=session.access_key,
                                 secret_key=session.secret_key)
    p = req.prepare()
    r = session.send(p)
    if r.status_code != 200:
        try:
            msg = get_s3_xml_text(r.text)
        except Exception as e:
            msg = r.text
        print(f'error: failed to {cmd} "{src_path}" to "{dest_path}" - {msg}', file=sys.stderr)
        sys.exit(1)
    elif cmd == 'copy':
        print(f'success: copied "{src_path}" to "{dest_path}".', file=sys.stderr)
    return (r, SRC_FILE)  # type: ignore