File: test_datasets.py

package info (click to toggle)
python-mne 1.3.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 100,172 kB
  • sloc: python: 166,349; pascal: 3,602; javascript: 1,472; sh: 334; makefile: 236
file content (273 lines) | stat: -rw-r--r-- 11,434 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
from functools import partial
import os
from os import path as op
import re
import shutil
import zipfile

import pooch
import pytest

from mne import datasets, read_labels_from_annot, write_labels_to_annot
from mne.datasets import (testing, fetch_infant_template, fetch_phantom,
                          fetch_dataset)
from mne.datasets._fsaverage.base import _set_montage_coreg_path
from mne.datasets._infant import base as infant_base
from mne.datasets._phantom import base as phantom_base
from mne.datasets.utils import _manifest_check_download

from mne.utils import (requires_good_network,
                       get_subjects_dir, ArgvSetter, _pl, use_log_level,
                       catch_logging, hashfunc)


subjects_dir = testing.data_path(download=False) / 'subjects'


def test_datasets_basic(tmp_path, monkeypatch):
    """Test simple dataset functions."""
    # XXX 'hf_sef' and 'misc' do not conform to these standards
    for dname in ('sample', 'somato', 'spm_face', 'testing', 'opm',
                  'bst_raw', 'bst_auditory', 'bst_resting', 'multimodal',
                  'bst_phantom_ctf', 'bst_phantom_elekta', 'kiloword',
                  'mtrf', 'phantom_4dbti',
                  'visual_92_categories', 'fieldtrip_cmc'):
        if dname.startswith('bst'):
            dataset = getattr(datasets.brainstorm, dname)
        else:
            dataset = getattr(datasets, dname)
        if str(dataset.data_path(download=False)) != '.':
            assert isinstance(dataset.get_version(), str)
            assert datasets.has_dataset(dname)
        else:
            assert dataset.get_version() is None
            assert not datasets.has_dataset(dname)
        print('%s: %s' % (dname, datasets.has_dataset(dname)))
    tempdir = str(tmp_path)
    # Explicitly test one that isn't preset (given the config)
    monkeypatch.setenv('MNE_DATASETS_SAMPLE_PATH', tempdir)
    dataset = datasets.sample
    assert str(dataset.data_path(download=False)) == '.'
    assert dataset.get_version() != ''
    assert dataset.get_version() is None
    # don't let it read from the config file to get the directory,
    # force it to look for the default
    monkeypatch.setenv('_MNE_FAKE_HOME_DIR', tempdir)
    monkeypatch.delenv('SUBJECTS_DIR', raising=False)
    assert (str(datasets.utils._get_path(None, 'foo', 'bar')) ==
            op.join(tempdir, 'mne_data'))
    assert get_subjects_dir(None) is None
    _set_montage_coreg_path()
    sd = get_subjects_dir()
    assert sd.endswith('MNE-fsaverage-data')
    monkeypatch.setenv('MNE_DATA', str(tmp_path / 'foo'))
    with pytest.raises(FileNotFoundError, match='as specified by MNE_DAT'):
        testing.data_path(download=False)


@requires_good_network
def test_downloads(tmp_path, monkeypatch, capsys):
    """Test dataset URL and version handling."""
    # Try actually downloading a dataset
    kwargs = dict(path=str(tmp_path), verbose=True)
    # XXX we shouldn't need to disable capsys here, but there's a pytest bug
    # that we're hitting (https://github.com/pytest-dev/pytest/issues/5997)
    # now that we use pooch
    with capsys.disabled():
        with pytest.raises(RuntimeError, match='Do not download .* in tests'):
            path = datasets._fake.data_path(update_path=False, **kwargs)
        monkeypatch.setattr(
            datasets.utils,
            '_MODULES_TO_ENSURE_DOWNLOAD_IS_FALSE_IN_TESTS', ())
        path = datasets._fake.data_path(update_path=False, **kwargs)
    assert op.isdir(path)
    assert op.isfile(op.join(path, 'bar'))
    assert not datasets.has_dataset('fake')  # not in the desired path
    assert datasets._fake.get_version() is None
    assert datasets.utils._get_version('fake') is None
    monkeypatch.setenv('_MNE_FAKE_HOME_DIR', str(tmp_path))
    with pytest.warns(RuntimeWarning, match='non-standard config'):
        new_path = datasets._fake.data_path(update_path=True, **kwargs)
    assert path == new_path
    out, _ = capsys.readouterr()
    assert 'Downloading' not in out
    # No version: shown as existing but unknown version
    assert datasets.has_dataset('fake')
    # XXX logic bug, should be "unknown"
    assert datasets._fake.get_version() == '0.0'
    # With a version but no required one: shown as existing and gives version
    fname = tmp_path / 'foo' / 'version.txt'
    with open(fname, 'w') as fid:
        fid.write('0.1')
    assert datasets.has_dataset('fake')
    assert datasets._fake.get_version() == '0.1'
    datasets._fake.data_path(download=False, **kwargs)
    out, _ = capsys.readouterr()
    assert 'out of date' not in out
    # With the required version: shown as existing with the required version
    monkeypatch.setattr(datasets._fetch, '_FAKE_VERSION', '0.1')
    assert datasets.has_dataset('fake')
    assert datasets._fake.get_version() == '0.1'
    datasets._fake.data_path(download=False, **kwargs)
    out, _ = capsys.readouterr()
    assert 'out of date' not in out
    monkeypatch.setattr(datasets._fetch, '_FAKE_VERSION', '0.2')
    # With an older version:
    # 1. Marked as not actually being present
    assert not datasets.has_dataset('fake')
    # 2. Will try to update when `data_path` gets called, with logged message
    want_msg = 'Correctly trying to download newer version'

    def _error_download(self, fname, downloader, processor):
        url = self.get_url(fname)
        full_path = self.abspath / fname
        assert 'foo.tgz' in url
        assert str(tmp_path) in str(full_path)
        raise RuntimeError(want_msg)

    monkeypatch.setattr(pooch.Pooch, 'fetch', _error_download)
    with pytest.raises(RuntimeError, match=want_msg):
        datasets._fake.data_path(**kwargs)
    out, _ = capsys.readouterr()
    assert re.match(r'.* 0\.1 .*out of date.* 0\.2.*', out, re.MULTILINE), out


@pytest.mark.slowtest
@testing.requires_testing_data
@requires_good_network
def test_fetch_parcellations(tmp_path):
    """Test fetching parcellations."""
    this_subjects_dir = str(tmp_path)
    os.mkdir(op.join(this_subjects_dir, 'fsaverage'))
    os.mkdir(op.join(this_subjects_dir, 'fsaverage', 'label'))
    os.mkdir(op.join(this_subjects_dir, 'fsaverage', 'surf'))
    for hemi in ('lh', 'rh'):
        shutil.copyfile(
            op.join(subjects_dir, 'fsaverage', 'surf', '%s.white' % hemi),
            op.join(this_subjects_dir, 'fsaverage', 'surf', '%s.white' % hemi))
    # speed up by prenteding we have one of them
    with open(op.join(this_subjects_dir, 'fsaverage', 'label',
                      'lh.aparc_sub.annot'), 'wb'):
        pass
    datasets.fetch_aparc_sub_parcellation(subjects_dir=this_subjects_dir)
    with ArgvSetter(('--accept-hcpmmp-license',)):
        datasets.fetch_hcp_mmp_parcellation(subjects_dir=this_subjects_dir)
    for hemi in ('lh', 'rh'):
        assert op.isfile(op.join(this_subjects_dir, 'fsaverage', 'label',
                                 '%s.aparc_sub.annot' % hemi))
    # test our annot round-trips here
    kwargs = dict(subject='fsaverage', hemi='both', sort=False,
                  subjects_dir=this_subjects_dir)
    labels = read_labels_from_annot(parc='HCPMMP1', **kwargs)
    write_labels_to_annot(
        labels, parc='HCPMMP1_round',
        table_name='./left.fsaverage164.label.gii', **kwargs)
    orig = op.join(this_subjects_dir, 'fsaverage', 'label', 'lh.HCPMMP1.annot')
    first = hashfunc(orig)
    new = orig[:-6] + '_round.annot'
    second = hashfunc(new)
    assert first == second


_zip_fnames = ['foo/foo.txt', 'foo/bar.txt', 'foo/baz.txt']


def _fake_zip_fetch(url, path, fname, known_hash):
    fname = op.join(path, fname)
    with zipfile.ZipFile(fname, 'w') as zipf:
        with zipf.open('foo/', 'w'):
            pass
        for fname in _zip_fnames:
            with zipf.open(fname, 'w'):
                pass


@pytest.mark.parametrize('n_have', range(len(_zip_fnames)))
def test_manifest_check_download(tmp_path, n_have, monkeypatch):
    """Test our manifest downloader."""
    monkeypatch.setattr(pooch, 'retrieve', _fake_zip_fetch)
    destination = op.join(str(tmp_path), 'empty')
    manifest_path = op.join(str(tmp_path), 'manifest.txt')
    with open(manifest_path, 'w') as fid:
        for fname in _zip_fnames:
            fid.write('%s\n' % fname)
    assert n_have in range(len(_zip_fnames) + 1)
    assert not op.isdir(destination)
    if n_have > 0:
        os.makedirs(op.join(destination, 'foo'))
        assert op.isdir(op.join(destination, 'foo'))
    for fname in _zip_fnames:
        assert not op.isfile(op.join(destination, fname))
    for fname in _zip_fnames[:n_have]:
        with open(op.join(destination, fname), 'w'):
            pass
    with catch_logging() as log:
        with use_log_level(True):
            # we mock the pooch.retrieve so these are not used
            url = hash_ = ''
            _manifest_check_download(manifest_path, destination, url, hash_)
    log = log.getvalue()
    n_missing = 3 - n_have
    assert ('%d file%s missing from' % (n_missing, _pl(n_missing))) in log
    for want in ('Extracting missing', 'Successfully '):
        if n_missing > 0:
            assert want in log
        else:
            assert want not in log
    assert op.isdir(destination)
    for fname in _zip_fnames:
        assert op.isfile(op.join(destination, fname))


def _fake_mcd(manifest_path, destination, url, hash_, name=None,
              fake_files=False):
    if name is None:
        name = url.split('/')[-1].split('.')[0]
        assert name in url
        assert name in destination
    assert name in manifest_path
    assert len(hash_) == 32
    if fake_files:
        with open(manifest_path) as fid:
            for path in fid:
                path = path.strip()
                if not path:
                    continue
                fname = op.join(destination, path)
                os.makedirs(op.dirname(fname), exist_ok=True)
                with open(fname, 'wb'):
                    pass


def test_infant(tmp_path, monkeypatch):
    """Test fetch_infant_template."""
    monkeypatch.setattr(infant_base, '_manifest_check_download', _fake_mcd)
    fetch_infant_template('12mo', subjects_dir=tmp_path)
    with pytest.raises(ValueError, match='Invalid value for'):
        fetch_infant_template('0mo', subjects_dir=tmp_path)


def test_phantom(tmp_path, monkeypatch):
    """Test phantom data downloading."""
    # The Otaniemi file is only ~6MB, so in principle maybe we could test
    # an actual download here. But it doesn't seem worth it given that
    # CircleCI will at least test the VectorView one, and this file should
    # not change often.
    monkeypatch.setattr(phantom_base, '_manifest_check_download',
                        partial(_fake_mcd, name='phantom_otaniemi',
                                fake_files=True))
    fetch_phantom('otaniemi', subjects_dir=tmp_path)
    assert op.isfile(tmp_path / 'phantom_otaniemi' / 'mri' / 'T1.mgz')


def test_fetch_uncompressed_file(tmp_path):
    """Test downloading an uncompressed file with our fetch function."""
    dataset_dict = dict(
        dataset_name='license',
        url=('https://raw.githubusercontent.com/mne-tools/mne-python/main/'
             'LICENSE.txt'),
        archive_name='LICENSE.foo',
        folder_name=op.join(tmp_path, 'foo'),
        hash=None)
    fetch_dataset(dataset_dict, path=None, force_update=True)
    assert (tmp_path / 'foo' / 'LICENSE.foo').is_file()