1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
|
from functools import partial
import os
from os import path as op
import re
import shutil
import zipfile
import pooch
import pytest
from mne import datasets, read_labels_from_annot, write_labels_to_annot
from mne.datasets import (testing, fetch_infant_template, fetch_phantom,
fetch_dataset)
from mne.datasets._fsaverage.base import _set_montage_coreg_path
from mne.datasets._infant import base as infant_base
from mne.datasets._phantom import base as phantom_base
from mne.datasets.utils import _manifest_check_download
from mne.utils import (requires_good_network,
get_subjects_dir, ArgvSetter, _pl, use_log_level,
catch_logging, hashfunc)
subjects_dir = testing.data_path(download=False) / 'subjects'
def test_datasets_basic(tmp_path, monkeypatch):
"""Test simple dataset functions."""
# XXX 'hf_sef' and 'misc' do not conform to these standards
for dname in ('sample', 'somato', 'spm_face', 'testing', 'opm',
'bst_raw', 'bst_auditory', 'bst_resting', 'multimodal',
'bst_phantom_ctf', 'bst_phantom_elekta', 'kiloword',
'mtrf', 'phantom_4dbti',
'visual_92_categories', 'fieldtrip_cmc'):
if dname.startswith('bst'):
dataset = getattr(datasets.brainstorm, dname)
else:
dataset = getattr(datasets, dname)
if str(dataset.data_path(download=False)) != '.':
assert isinstance(dataset.get_version(), str)
assert datasets.has_dataset(dname)
else:
assert dataset.get_version() is None
assert not datasets.has_dataset(dname)
print('%s: %s' % (dname, datasets.has_dataset(dname)))
tempdir = str(tmp_path)
# Explicitly test one that isn't preset (given the config)
monkeypatch.setenv('MNE_DATASETS_SAMPLE_PATH', tempdir)
dataset = datasets.sample
assert str(dataset.data_path(download=False)) == '.'
assert dataset.get_version() != ''
assert dataset.get_version() is None
# don't let it read from the config file to get the directory,
# force it to look for the default
monkeypatch.setenv('_MNE_FAKE_HOME_DIR', tempdir)
monkeypatch.delenv('SUBJECTS_DIR', raising=False)
assert (str(datasets.utils._get_path(None, 'foo', 'bar')) ==
op.join(tempdir, 'mne_data'))
assert get_subjects_dir(None) is None
_set_montage_coreg_path()
sd = get_subjects_dir()
assert sd.endswith('MNE-fsaverage-data')
monkeypatch.setenv('MNE_DATA', str(tmp_path / 'foo'))
with pytest.raises(FileNotFoundError, match='as specified by MNE_DAT'):
testing.data_path(download=False)
@requires_good_network
def test_downloads(tmp_path, monkeypatch, capsys):
"""Test dataset URL and version handling."""
# Try actually downloading a dataset
kwargs = dict(path=str(tmp_path), verbose=True)
# XXX we shouldn't need to disable capsys here, but there's a pytest bug
# that we're hitting (https://github.com/pytest-dev/pytest/issues/5997)
# now that we use pooch
with capsys.disabled():
with pytest.raises(RuntimeError, match='Do not download .* in tests'):
path = datasets._fake.data_path(update_path=False, **kwargs)
monkeypatch.setattr(
datasets.utils,
'_MODULES_TO_ENSURE_DOWNLOAD_IS_FALSE_IN_TESTS', ())
path = datasets._fake.data_path(update_path=False, **kwargs)
assert op.isdir(path)
assert op.isfile(op.join(path, 'bar'))
assert not datasets.has_dataset('fake') # not in the desired path
assert datasets._fake.get_version() is None
assert datasets.utils._get_version('fake') is None
monkeypatch.setenv('_MNE_FAKE_HOME_DIR', str(tmp_path))
with pytest.warns(RuntimeWarning, match='non-standard config'):
new_path = datasets._fake.data_path(update_path=True, **kwargs)
assert path == new_path
out, _ = capsys.readouterr()
assert 'Downloading' not in out
# No version: shown as existing but unknown version
assert datasets.has_dataset('fake')
# XXX logic bug, should be "unknown"
assert datasets._fake.get_version() == '0.0'
# With a version but no required one: shown as existing and gives version
fname = tmp_path / 'foo' / 'version.txt'
with open(fname, 'w') as fid:
fid.write('0.1')
assert datasets.has_dataset('fake')
assert datasets._fake.get_version() == '0.1'
datasets._fake.data_path(download=False, **kwargs)
out, _ = capsys.readouterr()
assert 'out of date' not in out
# With the required version: shown as existing with the required version
monkeypatch.setattr(datasets._fetch, '_FAKE_VERSION', '0.1')
assert datasets.has_dataset('fake')
assert datasets._fake.get_version() == '0.1'
datasets._fake.data_path(download=False, **kwargs)
out, _ = capsys.readouterr()
assert 'out of date' not in out
monkeypatch.setattr(datasets._fetch, '_FAKE_VERSION', '0.2')
# With an older version:
# 1. Marked as not actually being present
assert not datasets.has_dataset('fake')
# 2. Will try to update when `data_path` gets called, with logged message
want_msg = 'Correctly trying to download newer version'
def _error_download(self, fname, downloader, processor):
url = self.get_url(fname)
full_path = self.abspath / fname
assert 'foo.tgz' in url
assert str(tmp_path) in str(full_path)
raise RuntimeError(want_msg)
monkeypatch.setattr(pooch.Pooch, 'fetch', _error_download)
with pytest.raises(RuntimeError, match=want_msg):
datasets._fake.data_path(**kwargs)
out, _ = capsys.readouterr()
assert re.match(r'.* 0\.1 .*out of date.* 0\.2.*', out, re.MULTILINE), out
@pytest.mark.slowtest
@testing.requires_testing_data
@requires_good_network
def test_fetch_parcellations(tmp_path):
"""Test fetching parcellations."""
this_subjects_dir = str(tmp_path)
os.mkdir(op.join(this_subjects_dir, 'fsaverage'))
os.mkdir(op.join(this_subjects_dir, 'fsaverage', 'label'))
os.mkdir(op.join(this_subjects_dir, 'fsaverage', 'surf'))
for hemi in ('lh', 'rh'):
shutil.copyfile(
op.join(subjects_dir, 'fsaverage', 'surf', '%s.white' % hemi),
op.join(this_subjects_dir, 'fsaverage', 'surf', '%s.white' % hemi))
# speed up by prenteding we have one of them
with open(op.join(this_subjects_dir, 'fsaverage', 'label',
'lh.aparc_sub.annot'), 'wb'):
pass
datasets.fetch_aparc_sub_parcellation(subjects_dir=this_subjects_dir)
with ArgvSetter(('--accept-hcpmmp-license',)):
datasets.fetch_hcp_mmp_parcellation(subjects_dir=this_subjects_dir)
for hemi in ('lh', 'rh'):
assert op.isfile(op.join(this_subjects_dir, 'fsaverage', 'label',
'%s.aparc_sub.annot' % hemi))
# test our annot round-trips here
kwargs = dict(subject='fsaverage', hemi='both', sort=False,
subjects_dir=this_subjects_dir)
labels = read_labels_from_annot(parc='HCPMMP1', **kwargs)
write_labels_to_annot(
labels, parc='HCPMMP1_round',
table_name='./left.fsaverage164.label.gii', **kwargs)
orig = op.join(this_subjects_dir, 'fsaverage', 'label', 'lh.HCPMMP1.annot')
first = hashfunc(orig)
new = orig[:-6] + '_round.annot'
second = hashfunc(new)
assert first == second
_zip_fnames = ['foo/foo.txt', 'foo/bar.txt', 'foo/baz.txt']
def _fake_zip_fetch(url, path, fname, known_hash):
fname = op.join(path, fname)
with zipfile.ZipFile(fname, 'w') as zipf:
with zipf.open('foo/', 'w'):
pass
for fname in _zip_fnames:
with zipf.open(fname, 'w'):
pass
@pytest.mark.parametrize('n_have', range(len(_zip_fnames)))
def test_manifest_check_download(tmp_path, n_have, monkeypatch):
"""Test our manifest downloader."""
monkeypatch.setattr(pooch, 'retrieve', _fake_zip_fetch)
destination = op.join(str(tmp_path), 'empty')
manifest_path = op.join(str(tmp_path), 'manifest.txt')
with open(manifest_path, 'w') as fid:
for fname in _zip_fnames:
fid.write('%s\n' % fname)
assert n_have in range(len(_zip_fnames) + 1)
assert not op.isdir(destination)
if n_have > 0:
os.makedirs(op.join(destination, 'foo'))
assert op.isdir(op.join(destination, 'foo'))
for fname in _zip_fnames:
assert not op.isfile(op.join(destination, fname))
for fname in _zip_fnames[:n_have]:
with open(op.join(destination, fname), 'w'):
pass
with catch_logging() as log:
with use_log_level(True):
# we mock the pooch.retrieve so these are not used
url = hash_ = ''
_manifest_check_download(manifest_path, destination, url, hash_)
log = log.getvalue()
n_missing = 3 - n_have
assert ('%d file%s missing from' % (n_missing, _pl(n_missing))) in log
for want in ('Extracting missing', 'Successfully '):
if n_missing > 0:
assert want in log
else:
assert want not in log
assert op.isdir(destination)
for fname in _zip_fnames:
assert op.isfile(op.join(destination, fname))
def _fake_mcd(manifest_path, destination, url, hash_, name=None,
fake_files=False):
if name is None:
name = url.split('/')[-1].split('.')[0]
assert name in url
assert name in destination
assert name in manifest_path
assert len(hash_) == 32
if fake_files:
with open(manifest_path) as fid:
for path in fid:
path = path.strip()
if not path:
continue
fname = op.join(destination, path)
os.makedirs(op.dirname(fname), exist_ok=True)
with open(fname, 'wb'):
pass
def test_infant(tmp_path, monkeypatch):
"""Test fetch_infant_template."""
monkeypatch.setattr(infant_base, '_manifest_check_download', _fake_mcd)
fetch_infant_template('12mo', subjects_dir=tmp_path)
with pytest.raises(ValueError, match='Invalid value for'):
fetch_infant_template('0mo', subjects_dir=tmp_path)
def test_phantom(tmp_path, monkeypatch):
"""Test phantom data downloading."""
# The Otaniemi file is only ~6MB, so in principle maybe we could test
# an actual download here. But it doesn't seem worth it given that
# CircleCI will at least test the VectorView one, and this file should
# not change often.
monkeypatch.setattr(phantom_base, '_manifest_check_download',
partial(_fake_mcd, name='phantom_otaniemi',
fake_files=True))
fetch_phantom('otaniemi', subjects_dir=tmp_path)
assert op.isfile(tmp_path / 'phantom_otaniemi' / 'mri' / 'T1.mgz')
def test_fetch_uncompressed_file(tmp_path):
"""Test downloading an uncompressed file with our fetch function."""
dataset_dict = dict(
dataset_name='license',
url=('https://raw.githubusercontent.com/mne-tools/mne-python/main/'
'LICENSE.txt'),
archive_name='LICENSE.foo',
folder_name=op.join(tmp_path, 'foo'),
hash=None)
fetch_dataset(dataset_dict, path=None, force_update=True)
assert (tmp_path / 'foo' / 'LICENSE.foo').is_file()
|