File: test_ia_download.py

package info (click to toggle)
python-internetarchive 5.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,000 kB
  • sloc: python: 7,445; xml: 180; makefile: 180
file content (150 lines) | stat: -rw-r--r-- 5,248 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import sys
import time

import pytest

from tests.conftest import NASA_EXPECTED_FILES, call_cmd, files_downloaded


def test_no_args(tmpdir_ch):
    call_cmd('ia --insecure download nasa')
    assert files_downloaded(path='nasa') == NASA_EXPECTED_FILES


@pytest.mark.xfail("CI" in os.environ, reason="May timeout on continuous integration")
def test_https(tmpdir_ch):
    call_cmd('ia download nasa')
    assert files_downloaded(path='nasa') == NASA_EXPECTED_FILES


def test_dry_run():
    nasa_url = 'http://archive.org/download/nasa/'
    expected_urls = {nasa_url + f for f in NASA_EXPECTED_FILES}

    stdout, stderr = call_cmd('ia --insecure download --dry-run nasa')
    output_lines = stdout.split('\n')
    dry_run_urls = {x.strip() for x in output_lines if x and 'nasa:' not in x}

    assert expected_urls == dry_run_urls


def test_glob(tmpdir_ch):
    expected_files = {
        'globe_west_540.jpg',
        'globe_west_540_thumb.jpg',
        'nasa_itemimage.jpg',
        '__ia_thumb.jpg',
    }

    call_cmd('ia --insecure download --glob="*jpg" nasa')
    assert files_downloaded(path='nasa') == expected_files


def test_exclude(tmpdir_ch):
    expected_files = {
        'globe_west_540.jpg',
        'nasa_itemimage.jpg',
    }

    call_cmd('ia --insecure download --glob="*jpg" --exclude="*thumb*" nasa')
    assert files_downloaded(path='nasa') == expected_files


def test_format(tmpdir_ch):
    call_cmd('ia --insecure download --format="Archive BitTorrent" nasa')
    assert files_downloaded(path='nasa') == {'nasa_archive.torrent'}


def test_on_the_fly_format():
    i = 'wonderfulwizardo00baumiala'

    stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" {i}')
    assert stdout == ''

    stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" --on-the-fly {i}')
    assert stdout == f'http://archive.org/download/{i}/{i}_daisy.zip'


def test_clobber(tmpdir_ch):
    cmd = 'ia --insecure download nasa nasa_meta.xml'
    call_cmd(cmd)
    assert files_downloaded('nasa') == {'nasa_meta.xml'}

    stdout, stderr = call_cmd(cmd)
    assert files_downloaded('nasa') == {'nasa_meta.xml'}
    prefix = 'nasa:\n'.replace('\n', os.linesep)
    filepath = os.path.join('nasa', 'nasa_meta.xml')
    expected_stderr = f'{prefix} skipping {filepath}, file already exists based on length and date.'
    assert expected_stderr == stderr


def test_checksum(tmpdir_ch):
    call_cmd('ia --insecure download nasa nasa_meta.xml')
    assert files_downloaded('nasa') == {'nasa_meta.xml'}

    stdout, stderr = call_cmd('ia --insecure download --checksum nasa nasa_meta.xml')
    assert files_downloaded('nasa') == {'nasa_meta.xml'}
    prefix = 'nasa:\n'.replace('\n', os.linesep)
    filepath = os.path.join('nasa', 'nasa_meta.xml')
    assert f'{prefix} skipping {filepath}, file already exists based on checksum.' == stderr


def test_checksum_archive(tmpdir_ch):
    call_cmd('ia --insecure download nasa nasa_meta.xml')
    assert files_downloaded('nasa') == {'nasa_meta.xml'}

    stdout, stderr = call_cmd('ia --insecure download --checksum-archive nasa nasa_meta.xml')
    assert files_downloaded('nasa') == {'nasa_meta.xml'}
    prefix = 'nasa:\n'.replace('\n', os.linesep)
    filepath = os.path.join('nasa', 'nasa_meta.xml')
    assert f'{prefix} skipping {filepath}, file already exists based on checksum.' == stderr

    assert '_checksum_archive.txt' in files_downloaded('.')
    with open(os.path.join('.', '_checksum_archive.txt'), encoding='utf-8') as f:
        filepath = os.path.join('nasa', 'nasa_meta.xml')
        assert f.read() == f'{filepath}\n'

    stdout, stderr = call_cmd('ia --insecure download --checksum-archive nasa nasa_meta.xml')
    assert files_downloaded('nasa') == {'nasa_meta.xml'}
    prefix = 'nasa:\n'.replace('\n', os.linesep)
    filepath = os.path.join('nasa', 'nasa_meta.xml')
    assert f'{prefix} skipping {filepath}, file already exists based on checksum_archive.' == stderr


def test_no_directories(tmpdir_ch):
    call_cmd('ia --insecure download --no-directories nasa nasa_meta.xml')
    assert files_downloaded('.') == {'nasa_meta.xml'}


def test_destdir(tmpdir_ch):
    cmd = 'ia --insecure download --destdir=thisdirdoesnotexist/ nasa nasa_meta.xml'
    stdout, stderr = call_cmd(cmd, expected_exit_code=2)

    assert "--destdir: 'thisdirdoesnotexist/' is not a valid directory" in stderr

    tmpdir_ch.mkdir('thisdirdoesnotexist/')
    call_cmd(cmd)
    assert files_downloaded('thisdirdoesnotexist/nasa') == {'nasa_meta.xml'}

    tmpdir_ch.mkdir('dir2/')
    cmd = ('ia --insecure download --no-directories --destdir=dir2/ '
           'nasa nasa_meta.xml')
    call_cmd(cmd)
    assert files_downloaded('dir2') == {'nasa_meta.xml'}


def test_no_change_timestamp(tmpdir_ch):
    # TODO: Handle the case of daylight savings time

    now = time.time()
    call_cmd('ia --insecure download --no-change-timestamp nasa')

    for path, dirnames, filenames in os.walk(str(tmpdir_ch)):
        for d in dirnames:
            p = os.path.join(path, d)
            assert os.stat(p).st_mtime >= now

        for f in filenames:
            p = os.path.join(path, f)
            assert os.stat(p).st_mtime >= now