1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
import os
import sys
import time
import pytest
from tests.conftest import NASA_EXPECTED_FILES, call_cmd, files_downloaded
def test_no_args(tmpdir_ch):
call_cmd('ia --insecure download nasa')
assert files_downloaded(path='nasa') == NASA_EXPECTED_FILES
@pytest.mark.xfail("CI" in os.environ, reason="May timeout on continuous integration")
def test_https(tmpdir_ch):
call_cmd('ia download nasa')
assert files_downloaded(path='nasa') == NASA_EXPECTED_FILES
def test_dry_run():
nasa_url = 'http://archive.org/download/nasa/'
expected_urls = {nasa_url + f for f in NASA_EXPECTED_FILES}
stdout, stderr = call_cmd('ia --insecure download --dry-run nasa')
output_lines = stdout.split('\n')
dry_run_urls = {x.strip() for x in output_lines if x and 'nasa:' not in x}
assert expected_urls == dry_run_urls
def test_glob(tmpdir_ch):
expected_files = {
'globe_west_540.jpg',
'globe_west_540_thumb.jpg',
'nasa_itemimage.jpg',
'__ia_thumb.jpg',
}
call_cmd('ia --insecure download --glob="*jpg" nasa')
assert files_downloaded(path='nasa') == expected_files
def test_exclude(tmpdir_ch):
expected_files = {
'globe_west_540.jpg',
'nasa_itemimage.jpg',
}
call_cmd('ia --insecure download --glob="*jpg" --exclude="*thumb*" nasa')
assert files_downloaded(path='nasa') == expected_files
def test_format(tmpdir_ch):
call_cmd('ia --insecure download --format="Archive BitTorrent" nasa')
assert files_downloaded(path='nasa') == {'nasa_archive.torrent'}
def test_on_the_fly_format():
i = 'wonderfulwizardo00baumiala'
stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" {i}')
assert stdout == ''
stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" --on-the-fly {i}')
assert stdout == f'http://archive.org/download/{i}/{i}_daisy.zip'
def test_clobber(tmpdir_ch):
cmd = 'ia --insecure download nasa nasa_meta.xml'
call_cmd(cmd)
assert files_downloaded('nasa') == {'nasa_meta.xml'}
stdout, stderr = call_cmd(cmd)
assert files_downloaded('nasa') == {'nasa_meta.xml'}
prefix = 'nasa:\n'.replace('\n', os.linesep)
filepath = os.path.join('nasa', 'nasa_meta.xml')
expected_stderr = f'{prefix} skipping {filepath}, file already exists based on length and date.'
assert expected_stderr == stderr
def test_checksum(tmpdir_ch):
call_cmd('ia --insecure download nasa nasa_meta.xml')
assert files_downloaded('nasa') == {'nasa_meta.xml'}
stdout, stderr = call_cmd('ia --insecure download --checksum nasa nasa_meta.xml')
assert files_downloaded('nasa') == {'nasa_meta.xml'}
prefix = 'nasa:\n'.replace('\n', os.linesep)
filepath = os.path.join('nasa', 'nasa_meta.xml')
assert f'{prefix} skipping {filepath}, file already exists based on checksum.' == stderr
def test_checksum_archive(tmpdir_ch):
call_cmd('ia --insecure download nasa nasa_meta.xml')
assert files_downloaded('nasa') == {'nasa_meta.xml'}
stdout, stderr = call_cmd('ia --insecure download --checksum-archive nasa nasa_meta.xml')
assert files_downloaded('nasa') == {'nasa_meta.xml'}
prefix = 'nasa:\n'.replace('\n', os.linesep)
filepath = os.path.join('nasa', 'nasa_meta.xml')
assert f'{prefix} skipping {filepath}, file already exists based on checksum.' == stderr
assert '_checksum_archive.txt' in files_downloaded('.')
with open(os.path.join('.', '_checksum_archive.txt'), encoding='utf-8') as f:
filepath = os.path.join('nasa', 'nasa_meta.xml')
assert f.read() == f'{filepath}\n'
stdout, stderr = call_cmd('ia --insecure download --checksum-archive nasa nasa_meta.xml')
assert files_downloaded('nasa') == {'nasa_meta.xml'}
prefix = 'nasa:\n'.replace('\n', os.linesep)
filepath = os.path.join('nasa', 'nasa_meta.xml')
assert f'{prefix} skipping {filepath}, file already exists based on checksum_archive.' == stderr
def test_no_directories(tmpdir_ch):
call_cmd('ia --insecure download --no-directories nasa nasa_meta.xml')
assert files_downloaded('.') == {'nasa_meta.xml'}
def test_destdir(tmpdir_ch):
cmd = 'ia --insecure download --destdir=thisdirdoesnotexist/ nasa nasa_meta.xml'
stdout, stderr = call_cmd(cmd, expected_exit_code=2)
assert "--destdir: 'thisdirdoesnotexist/' is not a valid directory" in stderr
tmpdir_ch.mkdir('thisdirdoesnotexist/')
call_cmd(cmd)
assert files_downloaded('thisdirdoesnotexist/nasa') == {'nasa_meta.xml'}
tmpdir_ch.mkdir('dir2/')
cmd = ('ia --insecure download --no-directories --destdir=dir2/ '
'nasa nasa_meta.xml')
call_cmd(cmd)
assert files_downloaded('dir2') == {'nasa_meta.xml'}
def test_no_change_timestamp(tmpdir_ch):
# TODO: Handle the case of daylight savings time
now = time.time()
call_cmd('ia --insecure download --no-change-timestamp nasa')
for path, dirnames, filenames in os.walk(str(tmpdir_ch)):
for d in dirnames:
p = os.path.join(path, d)
assert os.stat(p).st_mtime >= now
for f in filenames:
p = os.path.join(path, f)
assert os.stat(p).st_mtime >= now
|