File: ia_list.py

package info (click to toggle)
python-internetarchive 3.3.0-2~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,096 kB
  • sloc: python: 6,276; xml: 180; makefile: 180
file content (84 lines) | stat: -rw-r--r-- 3,002 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#
# The internetarchive module is a Python/CLI interface to Archive.org.
#
# Copyright (C) 2012-2019 Internet Archive
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""List files in a given item.

usage:
    ia list [-v] [--glob=<pattern>] [--location] [--format=<format>...]
            [--columns <column1,column2> | --all] <identifier>

options:
    -h, --help
    -v, --verbose               Print column headers. [default: False]
    -a, --all                   List all information available for files.
    -l, --location              Print full URL for each file.
    -c, --columns=<name,size>   List specified file information. [default: name]
    -g, --glob=<pattern>        Only return patterns match the given pattern.
    -f, --format=<format>       Return files matching <format>.
"""
import csv
import sys
from fnmatch import fnmatch
from itertools import chain

from docopt import docopt

from internetarchive import ArchiveSession


def main(argv, session: ArchiveSession) -> None:
    args = docopt(__doc__, argv=argv)
    item = session.get_item(args['<identifier>'])

    files = item.files
    if args.get('--all'):
        columns = list(set(chain.from_iterable(k for k in files)))
    else:
        columns = args['--columns'].split(',')

    # Make "name" the first column always.
    if 'name' in columns:
        columns.remove('name')
        columns.insert(0, 'name')

    dict_writer = csv.DictWriter(sys.stdout, columns, delimiter='\t', lineterminator='\n')

    if args.get('--glob'):
        patterns = args['--glob'].split('|')
        files = [f for f in files if any(fnmatch(f['name'], p) for p in patterns)]
    elif args.get('--format'):
        files = [f.__dict__ for f in item.get_files(formats=args['--format'])]

    output = []
    for f in files:
        file_dict = {}
        for key, val in f.items():
            if key in columns:
                if isinstance(val, (list, tuple, set)):
                    val = ';'.join(val)
                if key == 'name' and args.get('--location'):
                    file_dict[key] = f'https://{session.host}/download/{item.identifier}/{val}'
                else:
                    file_dict[key] = val
        output.append(file_dict)

    if args['--verbose']:
        dict_writer.writer.writerow(columns)
    if all(x == {} for x in output):
        sys.exit(1)
    dict_writer.writerows(output)