1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
#
# The internetarchive module is a Python/CLI interface to Archive.org.
#
# Copyright (C) 2012-2019 Internet Archive
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""List files in a given item.
usage:
ia list [-v] [--glob=<pattern>] [--location] [--format=<format>...]
[--columns <column1,column2> | --all] <identifier>
options:
-h, --help
-v, --verbose Print column headers. [default: False]
-a, --all List all information available for files.
-l, --location Print full URL for each file.
-c, --columns=<name,size> List specified file information. [default: name]
-g, --glob=<pattern> Only return patterns match the given pattern.
-f, --format=<format> Return files matching <format>.
"""
import csv
import sys
from fnmatch import fnmatch
from itertools import chain
from docopt import docopt
from internetarchive import ArchiveSession
def main(argv, session: ArchiveSession) -> None:
args = docopt(__doc__, argv=argv)
item = session.get_item(args['<identifier>'])
files = item.files
if args.get('--all'):
columns = list(set(chain.from_iterable(k for k in files)))
else:
columns = args['--columns'].split(',')
# Make "name" the first column always.
if 'name' in columns:
columns.remove('name')
columns.insert(0, 'name')
dict_writer = csv.DictWriter(sys.stdout, columns, delimiter='\t', lineterminator='\n')
if args.get('--glob'):
patterns = args['--glob'].split('|')
files = [f for f in files if any(fnmatch(f['name'], p) for p in patterns)]
elif args.get('--format'):
files = [f.__dict__ for f in item.get_files(formats=args['--format'])]
output = []
for f in files:
file_dict = {}
for key, val in f.items():
if key in columns:
if isinstance(val, (list, tuple, set)):
val = ';'.join(val)
if key == 'name' and args.get('--location'):
file_dict[key] = f'https://{session.host}/download/{item.identifier}/{val}'
else:
file_dict[key] = val
output.append(file_dict)
if args['--verbose']:
dict_writer.writer.writerow(columns)
if all(x == {} for x in output):
sys.exit(1)
dict_writer.writerows(output)
|